In [None]:
# ==========================================================
# 1. Clone the Fruits Detection Dataset (YOLO format)
# ==========================================================
!git clone https://github.com/lightly-ai/dataset_fruits_detection.git
%cd dataset_fruits_detection

# Check the contents of the cloned directory
!ls

# ==========================================================
# 2. Install Ultralytics YOLOv8
# ==========================================================
# Install the ultralytics library which contains YOLOv8
!pip install ultralytics --quiet

import pandas as pd
import numpy as np
from ultralytics import YOLO # Import the YOLO model
import os # Import os module for interacting with the operating system
import matplotlib.pyplot as plt # Import matplotlib for plotting
import cv2 # Import opencv for image processing

# ==========================================================
# 3. Load Pretrained YOLOv8 Model
# ==========================================================
# Load a pretrained YOLOv8s model (small version)
# Using a pretrained model helps to leverage knowledge learned from a large dataset
model = YOLO("yolov8s.pt")

# ==========================================================
# 4. Train the Model on Fruits Dataset
# ==========================================================
# Train the YOLO model on the fruits dataset
# 'data.yaml' specifies the dataset configuration (paths to train/val images, class names, etc.)
# 'epochs' is the number of training iterations over the dataset
# 'imgsz' is the image size for training
# 'batch' is the number of images processed in each training step
# 'pretrained=True' ensures we start training from the pretrained weights
results = model.train(
    data="data.yaml",
    epochs=20,
    imgsz=640,
    batch=16,
    pretrained=True
)

# ==========================================================
# 5. Evaluate the Model on Validation Set
# ==========================================================
# Evaluate the trained model on the validation dataset
metrics = model.val()
# Print the evaluation metrics (e.g., mAP - mean Average Precision)
print(metrics)

# ==========================================================
# 6. Run Inference on a Sample Image
# ==========================================================
# Define the path to a sample image from the validation set
# sample_image = "valid/0_0_640.jpg"  # pick an image from valid folder
sample_image = "valid/images/0_0_640.jpg" # Corrected path based on dataset structure
# Run inference on the sample image. 'save=True' saves the image with predictions
preds = model.predict(sample_image, save=True)

# Show the prediction on the sample image
# Read the image using OpenCV
img = cv2.imread(sample_image)
# Convert the image from BGR (OpenCV default) to RGB (Matplotlib default)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Display the image with predictions
plt.imshow(img_rgb)
# Turn off the axis
plt.axis("off")
# Show the plot
plt.show()

# ==========================================================
# 7. Run Inference on Multiple Images (Optional)
# ==========================================================
# Get a list of the first 5 image files from the validation directory
test_images = os.listdir("valid/images")[:5] # Corrected path for listing images
# Loop through each test image
for img_file in test_images:
    # Create the full path to the image file
    img_path = os.path.join("valid/images", img_file) # Corrected path for joining
    # Run inference on the current test image, saving the results
    preds = model.predict(img_path, save=True)
    # Read the image
    img = cv2.imread(img_path)
    # Convert the image to RGB
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # Display the image with predictions
    plt.imshow(img_rgb)
    # Set the title of the plot to the image filename
    plt.title(f"Prediction: {img_file}")
    # Turn off the axis
    plt.axis("off")
    # Show the plot
    plt.show()