In [None]:
# === Copy all generated files to Google Drive ===
import shutil
from pathlib import Path

# Create a backup folder in Google Drive
drive_backup = Path('/content/drive/MyDrive/cacao_training_backup')
drive_backup.mkdir(exist_ok=True)

folders_to_backup = [
    'trained_models',
    'cacao_training/cacao_segmentation_v1',
    'test_predictions',
    'runs/segment',
]

files_to_backup = [
    'segmentation_metrics.json',
]

print("üì§ Copying files to Google Drive...")
for folder in folders_to_backup:
    if Path(folder).exists():
        dest = drive_backup / Path(folder).name
        if dest.exists():
            shutil.rmtree(dest)
        shutil.copytree(folder, dest)
        print(f"  ‚úÖ Copied: {folder} -> {dest}")
    else:
        print(f"  ‚ö†Ô∏è Not found: {folder}")

for file in files_to_backup:
    if Path(file).exists():
        shutil.copy(file, drive_backup / Path(file).name)
        print(f"  ‚úÖ Copied: {file}")
    else:
        print(f"  ‚ö†Ô∏è Not found: {file}")

print(f"\n‚úÖ All files backed up to: {drive_backup}")
print("You can access them from Google Drive > MyDrive > cacao_training_backup/")

In [None]:
# === Download as ZIP files ===
import zipfile
from google.colab import files
import os

def create_and_download_zip(folder_path, zip_name):
    """Create a ZIP file and download it"""
    if not os.path.exists(folder_path):
        print(f"‚ö†Ô∏è Folder not found: {folder_path}")
        return
    
    zip_path = f"{zip_name}.zip"
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, file_list in os.walk(folder_path):
            for file in file_list:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, os.path.dirname(folder_path))
                zipf.write(file_path, arcname)
    
    file_size = os.path.getsize(zip_path) / 1e6
    print(f"üì¶ Created: {zip_path} ({file_size:.2f} MB)")
    
    try:
        files.download(zip_path)
        print(f"‚úÖ Downloaded: {zip_name}.zip")
    except Exception as e:
        print(f"‚ö†Ô∏è Download failed: {e}")
        print(f"   File saved locally at: {zip_path}")

print("Creating ZIP files for download...\n")

# Download trained models (most important)
create_and_download_zip('trained_models', 'cacao_trained_models')

# Download training results
create_and_download_zip('cacao_training/cacao_segmentation_v1', 'cacao_training_results')

# Download test predictions
create_and_download_zip('test_predictions', 'cacao_test_predictions')

# Download validation runs
create_and_download_zip('runs/segment', 'cacao_validation_runs')

print("\n‚úÖ All ZIP files created and downloaded!")

In [2]:
# Automatically download trained models to your computer
import os
from pathlib import Path

# Check if running in Google Colab
try:
    from google.colab import files
    IN_COLAB = True
    print("‚úÖ Running in Google Colab - Will auto-download models")
except ImportError:
    IN_COLAB = False
    print("üíª Not in Colab - Models saved locally")

# List of models to download
models_to_download = [
    'trained_models/cacao_segmentation_best.pt',
    'trained_models/cacao_segmentation_best.onnx',
    'trained_models/cacao_segmentation_best.torchscript',
]

if IN_COLAB:
    print("\nüì• Downloading models to your computer...")
    for model_path in models_to_download:
        if os.path.exists(model_path):
            file_size = os.path.getsize(model_path) / 1e6
            print(f"\n‚¨áÔ∏è Downloading: {Path(model_path).name} ({file_size:.2f} MB)")
            try:
                files.download(model_path)
                print(f"   ‚úÖ Downloaded successfully!")
            except Exception as e:
                print(f"   ‚ö†Ô∏è Download failed: {e}")
        else:
            print(f"\n‚ö†Ô∏è Model not found: {model_path}")

    print("\n" + "="*60)
    print("üéâ All models downloaded to your Downloads folder!")
    print("="*60)
else:
    print("\nüìÇ Models saved locally at:")
    for model_path in models_to_download:
        if os.path.exists(model_path):
            file_size = os.path.getsize(model_path) / 1e6
            print(f"  ‚úÖ {model_path} ({file_size:.2f} MB)")
        else:
            print(f"  ‚ùå {model_path} (not found)")

    print("\nüí° To use in your mobile app:")
    print("   1. Copy the .onnx file to: mobile-app/assets/models/")
    print("   2. Copy the .pt file to: public/models/")

‚úÖ Running in Google Colab - Will auto-download models

üì• Downloading models to your computer...

‚¨áÔ∏è Downloading: cacao_segmentation_best.pt (6.79 MB)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

   ‚úÖ Downloaded successfully!

‚ö†Ô∏è Model not found: trained_models/cacao_segmentation_best.onnx

‚ö†Ô∏è Model not found: trained_models/cacao_segmentation_best.torchscript

üéâ All models downloaded to your Downloads folder!


In [3]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: Error: credential propagation was unsuccessful

In [None]:
!mkdir -p trained_models

## 1. Setup Environment and Install Dependencies

In [None]:
# Check GPU availability
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

In [None]:
# Install required packages
!pip install ultralytics roboflow opencv-python-headless matplotlib pandas seaborn -q

In [None]:
# Import libraries
from ultralytics import YOLO
import os
from pathlib import Path
import yaml
from roboflow import Roboflow
import shutil

## 2. Download Dataset from Roboflow

In [None]:
# Initialize Roboflow with your API key
# Get your API key from: https://app.roboflow.com/settings/api
ROBOFLOW_API_KEY = "BmOmRgtqhSUKBitTttWj"  # Replace with your actual API key

rf = Roboflow(api_key=ROBOFLOW_API_KEY)
project = rf.workspace("cariesdetectionproject").project("cacao-uf6rm")

# Download dataset in YOLOv8 segmentation format
# Using the latest version (v5 as shown in the screenshot)
dataset = project.version(5).download("yolov8", location="./cacao_dataset")

In [None]:
# Verify dataset structure
dataset_path = Path("./cacao_dataset")
print(f"Dataset downloaded to: {dataset_path}")
print(f"\nDataset structure:")
for item in dataset_path.rglob("*"):
    if item.is_dir():
        print(f"  üìÅ {item.relative_to(dataset_path)}/")
        # Count files in each directory
        files = list(item.glob("*"))
        if files:
            print(f"     ({len(files)} files)")

In [None]:
# Check the data.yaml file
yaml_path = dataset_path / "data.yaml"
with open(yaml_path, 'r') as f:
    data_config = yaml.safe_load(f)

print("Dataset configuration:")
print(f"  Train images: {data_config.get('train', 'N/A')}")
print(f"  Val images: {data_config.get('val', 'N/A')}")
print(f"  Test images: {data_config.get('test', 'N/A')}")
print(f"  Number of classes: {data_config.get('nc', 'N/A')}")
print(f"  Class names: {data_config.get('names', 'N/A')}")

## 3. Configure Training Parameters

In [None]:
# Training configuration
CONFIG = {
    'model_size': 'yolov8n-seg',  # nano model for edge deployment (options: n, s, m, l, x)
    'epochs': 100,
    'batch_size': 16,  # Adjust based on GPU memory
    'imgsz': 640,  # Image size
    'device': 0 if torch.cuda.is_available() else 'cpu',
    'patience': 20,  # Early stopping patience
    'save_period': 10,  # Save checkpoint every N epochs
    'workers': 8,
    'optimizer': 'AdamW',
    'lr0': 0.001,  # Initial learning rate
    'weight_decay': 0.0005,
    'mosaic': 1.0,  # Mosaic augmentation probability
    'mixup': 0.0,  # Mixup augmentation probability
    'hsv_h': 0.015,  # HSV-Hue augmentation
    'hsv_s': 0.7,  # HSV-Saturation augmentation
    'hsv_v': 0.4,  # HSV-Value augmentation
    'degrees': 10.0,  # Rotation augmentation
    'translate': 0.1,  # Translation augmentation
    'scale': 0.5,  # Scale augmentation
    'shear': 0.0,  # Shear augmentation
    'flipud': 0.0,  # Vertical flip probability
    'fliplr': 0.5,  # Horizontal flip probability
}

print("Training Configuration:")
for key, value in CONFIG.items():
    print(f"  {key}: {value}")

## 4. Initialize YOLOv8 Segmentation Model

In [None]:
# Load pretrained YOLOv8 segmentation model
model = YOLO(f"{CONFIG['model_size']}.pt")

print(f"Model loaded: {CONFIG['model_size']}")
print(f"Model summary:")
model.info()

## 5. Train the Model

In [None]:
# Train the model
results = model.train(
    data=str(yaml_path),
    epochs=CONFIG['epochs'],
    batch=CONFIG['batch_size'],
    imgsz=CONFIG['imgsz'],
    device=CONFIG['device'],
    patience=CONFIG['patience'],
    save_period=CONFIG['save_period'],
    workers=CONFIG['workers'],
    optimizer=CONFIG['optimizer'],
    lr0=CONFIG['lr0'],
    weight_decay=CONFIG['weight_decay'],
    mosaic=CONFIG['mosaic'],
    mixup=CONFIG['mixup'],
    hsv_h=CONFIG['hsv_h'],
    hsv_s=CONFIG['hsv_s'],
    hsv_v=CONFIG['hsv_v'],
    degrees=CONFIG['degrees'],
    translate=CONFIG['translate'],
    scale=CONFIG['scale'],
    shear=CONFIG['shear'],
    flipud=CONFIG['flipud'],
    fliplr=CONFIG['fliplr'],
    project='cacao_training',
    name='cacao_segmentation_v1',
    exist_ok=True,
    pretrained=True,
    verbose=True,
    plots=True
)

print("\n‚úÖ Training completed!")

## 6. Evaluate Model Performance

In [None]:
# Validate the trained model
metrics = model.val()

print("\nüìä Validation Metrics:")
print(f"  Box mAP50: {metrics.box.map50:.4f}")
print(f"  Box mAP50-95: {metrics.box.map:.4f}")
print(f"  Mask mAP50: {metrics.seg.map50:.4f}")
print(f"  Mask mAP50-95: {metrics.seg.map:.4f}")

## 7. Visualize Training Results

In [None]:
# Display training curves
from IPython.display import Image, display
import matplotlib.pyplot as plt

results_dir = Path('cacao_training/cacao_segmentation_v1')

# Show results plot
results_img = results_dir / 'results.png'
if results_img.exists():
    print("\nüìà Training Results:")
    display(Image(filename=str(results_img)))
else:
    print("Results plot not found")

In [None]:
# Show confusion matrix
confusion_matrix_img = results_dir / 'confusion_matrix.png'
if confusion_matrix_img.exists():
    print("\nüî¢ Confusion Matrix:")
    display(Image(filename=str(confusion_matrix_img)))
else:
    print("Confusion matrix not found")

In [None]:
# Show sample predictions
val_batch_pred = results_dir / 'val_batch0_pred.jpg'
if val_batch_pred.exists():
    print("\nüéØ Sample Predictions:")
    display(Image(filename=str(val_batch_pred)))
else:
    print("Validation predictions not found")

## 8. Test on Sample Images

In [None]:
# Load best model
best_model_path = results_dir / 'weights' / 'best.pt'
best_model = YOLO(str(best_model_path))

print(f"‚úÖ Best model loaded from: {best_model_path}")

In [None]:
# Test on validation images
val_images_dir = dataset_path / 'valid' / 'images'
test_images = list(val_images_dir.glob('*.jpg'))[:5]  # Test on first 5 images

print(f"Testing on {len(test_images)} images...\n")

for img_path in test_images:
    print(f"Processing: {img_path.name}")

    # Run inference
    results = best_model.predict(
        source=str(img_path),
        conf=0.25,
        iou=0.7,
        show=False,
        save=True,
        project='test_predictions',
        name='segmentation',
        exist_ok=True
    )

    # Display results
    result = results[0]
    if result.masks is not None:
        print(f"  ‚úÖ Detected {len(result.masks)} pod(s)")
    else:
        print(f"  ‚ö†Ô∏è No pods detected")

print("\n‚úÖ Predictions saved to: test_predictions/segmentation/")

In [None]:
# Display test predictions
pred_dir = Path('test_predictions/segmentation')
pred_images = list(pred_dir.glob('*.jpg'))

if pred_images:
    print("\nüé® Test Predictions:")
    for pred_img in pred_images[:3]:  # Show first 3
        print(f"\n{pred_img.name}:")
        display(Image(filename=str(pred_img)))
else:
    print("No prediction images found")

## 9. Export Model for Production

In [None]:
# Export to ONNX for edge deployment
onnx_path = best_model.export(format='onnx', imgsz=CONFIG['imgsz'])
print(f"‚úÖ Model exported to ONNX: {onnx_path}")

In [None]:
# Export to TorchScript for mobile deployment
torchscript_path = best_model.export(format='torchscript', imgsz=CONFIG['imgsz'])
print(f"‚úÖ Model exported to TorchScript: {torchscript_path}")

## 10. Download Trained Model

In [None]:
# Copy best model to easy access location
output_dir = Path('trained_models')
output_dir.mkdir(exist_ok=True)

# Copy PyTorch model
shutil.copy(best_model_path, output_dir / 'cacao_segmentation_best.pt')

# Copy ONNX model if exists
if Path(onnx_path).exists():
    shutil.copy(onnx_path, output_dir / 'cacao_segmentation_best.onnx')

# Copy TorchScript model if exists
if Path(torchscript_path).exists():
    shutil.copy(torchscript_path, output_dir / 'cacao_segmentation_best.torchscript')

print(f"\n‚úÖ Models saved to: {output_dir.absolute()}")
print("\nDownload these files to use in your application:")
for model_file in output_dir.glob('*'):
    print(f"  üì¶ {model_file.name} ({model_file.stat().st_size / 1e6:.2f} MB)")

In [None]:
# Create model info file
model_info = {
    'model_name': 'Cacao Pod Segmentation YOLOv8',
    'model_size': CONFIG['model_size'],
    'dataset': 'Roboflow Cacao Dataset v5',
    'total_images': data_config.get('nc', 'N/A'),
    'classes': data_config.get('names', []),
    'training_epochs': CONFIG['epochs'],
    'image_size': CONFIG['imgsz'],
    'box_map50': float(metrics.box.map50),
    'box_map50_95': float(metrics.box.map),
    'mask_map50': float(metrics.seg.map50),
    'mask_map50_95': float(metrics.seg.map),
}

import json
with open(output_dir / 'model_info.json', 'w') as f:
    json.dump(model_info, f, indent=2)

print("\nüìã Model information saved to: trained_models/model_info.json")

## üéâ Training Complete!

Your cacao pod segmentation model is now trained and ready for deployment.

**Next Steps:**
1. Download the models from the `trained_models` directory
2. Integrate the model into your mobile app
3. Train the MobileNetV3 SimCLR model for yield estimation (see next notebook)

**Model Performance Summary:**
- Box mAP50-95: Check the metrics above
- Mask mAP50-95: Check the metrics above
- Ready for edge deployment with ONNX and TorchScript exports

In [None]:

# === PATCHED: Extract and store YOLOv8 validation metrics (no retraining) ===
from ultralytics import YOLO

# Use the correct path from training configuration
model = YOLO("cacao_training/cacao_segmentation_v1/weights/best.pt")
metrics = model.val()

# Use mean precision and recall (mp and mr) which are already scalar values
precision = float(metrics.box.mp)  # mean precision across all classes
recall = float(metrics.box.mr)     # mean recall across all classes
map50 = float(metrics.box.map50)   # mAP at IoU=0.5
map50_95 = float(metrics.box.map)  # mAP at IoU=0.5:0.95

print("Box Metrics:")
print(f"  Precision: {precision:.4f}")
print(f"  Recall: {recall:.4f}")
print(f"  mAP@0.5: {map50:.4f}")
print(f"  mAP@0.5:0.95: {map50_95:.4f}")

print("\nMask Metrics:")
print(f"  Mask mAP@0.5: {metrics.seg.map50:.4f}")
print(f"  Mask mAP@0.5:0.95: {metrics.seg.map:.4f}")


In [None]:

# === PATCHED: Save segmentation metrics for plotting ===
import json
with open("segmentation_metrics.json", "w") as f:
    json.dump({
        "precision": precision,
        "recall": recall,
        "map50": map50,
        "map50_95": map50_95,
        "mask_map50": float(metrics.seg.map50),
        "mask_map50_95": float(metrics.seg.map)
    }, f, indent=2)
print("Saved segmentation_metrics.json")


In [None]:

# === PATCHED: Qualitative segmentation visualization helper ===
import matplotlib.pyplot as plt
import cv2

def show_segmentation(image_path, masks):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    overlay = img.copy()
    for mask in masks:
        overlay[mask > 0] = [255, 0, 0]
    plt.figure()
    plt.imshow(overlay)
    plt.axis("off")
    plt.title("YOLOv8 Instance Segmentation Output")
    plt.show()
