In [2]:
import os
import cv2
import numpy as np
from PIL import Image
import argparse

def extract_yolo_bboxes(images_dir, labels_dir, output_dir, class_name, target_size=(224, 224), 
                       image_ext='.jpg', save_with_class=True):

    # Create an output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # Get all the image files
    image_files = [f for f in os.listdir(images_dir) if f.endswith(image_ext)]
    
    total_crops = 0
    
    for image_file in image_files:
        # Build the corresponding tag file path
        label_file = os.path.splitext(image_file)[0] + '.txt'
        label_path = os.path.join(labels_dir, label_file)
        
        # If the tag file does not exist, skip it
        if not os.path.exists(label_path):
            continue
            
        # Read the picture
        image_path = os.path.join(images_dir, image_file)
        image = cv2.imread(image_path)
        if image is None:
            print(f"Can not read image: {image_path}")
            continue
            
        img_height, img_width = image.shape[:2]
        
        # Read the label file
        with open(label_path, 'r') as f:
            lines = f.readlines()
        
        for i, line in enumerate(lines):
            parts = line.strip().split()
            if len(parts) < 5:
                continue
                
            # Parse the YOLO format: class x_center y_center width height
            class_id = int(parts[0])
            x_center = float(parts[1]) * img_width
            y_center = float(parts[2]) * img_height
            width = float(parts[3]) * img_width
            height = float(parts[4]) * img_height
            
            # Calculate the coordinates of the bounding box
            x1 = int(x_center - width / 2)
            y1 = int(y_center - height / 2)
            x2 = int(x_center + width / 2)
            y2 = int(y_center + height / 2)
            
            # Make sure the coordinates are within the range of the picture
            x1 = max(0, x1)
            y1 = max(0, y1)
            x2 = min(img_width - 1, x2)
            y2 = min(img_height - 1, y2)
            
            # Check whether the bounding box is valid
            if x2 <= x1 or y2 <= y1:
                print(f"Invalid bounding box: {image_file} the {i+1} th box")
                continue
            
            # Crop the picture
            cropped = image[y1:y2, x1:x2]
            
            # Resize
            resized = cv2.resize(cropped, target_size, interpolation=cv2.INTER_AREA)
            
            # Save the picture
            if save_with_class:
                # Save by category
                class_dir = os.path.join(output_dir, class_name[class_id])
                os.makedirs(class_dir, exist_ok=True)
                base_name = f"{os.path.splitext(image_file)[0]}_bbox{i}"
                output_path = os.path.join(class_dir, f"{base_name}.jpg")
            else:
                # All the pictures are saved in the same directory
                base_name = f"{os.path.splitext(image_file)[0]}_bbox{i}_{class_name[class_id]}"
                output_path = os.path.join(output_dir, f"{base_name}.jpg")
            
            cv2.imwrite(output_path, resized)
            total_crops += 1
            
            print(f"Save: {output_path}")
    
    print(f"\nFinial! A total of {total_crops} bounding box images were extracted")
    print(f"Output directory: {output_dir}")

In [3]:
for dataset in ['train','test','valid']:
    extract_yolo_bboxes(
        images_dir = f"archive/{dataset}/images",
        labels_dir = f"archive/{dataset}/labels", 
        output_dir = f"Recognized/{dataset}",
        class_name=["Ants","Bees","Beetles","Caterpillars","Earthworms","Earwigs","Grasshoppers","Moths","Slugs","Snails","Wasps","Weevils"],
        target_size=(200,200),
        save_with_class=True
    )

Save: Recognized/train\Bees\ants-1-_jpg.rf.8227b4d5f1cbbd72c290c1ca6012a337_bbox0.jpg
Save: Recognized/train\Bees\ants-1-_jpg.rf.8227b4d5f1cbbd72c290c1ca6012a337_bbox1.jpg
Save: Recognized/train\Bees\ants-1-_jpg.rf.cb38695892dc2ca4d3ee97d3f8a06ba7_bbox0.jpg
Save: Recognized/train\Bees\ants-1-_jpg.rf.cb38695892dc2ca4d3ee97d3f8a06ba7_bbox1.jpg
Save: Recognized/train\Bees\ants-1-_jpg.rf.ef0ce7e104418cd6d0e0f57c45de1f35_bbox0.jpg
Save: Recognized/train\Bees\ants-1-_jpg.rf.ef0ce7e104418cd6d0e0f57c45de1f35_bbox1.jpg
Save: Recognized/train\Ants\ants-1-_png.rf.0b5a6cf9f49f735808979cd685d8ce4f_bbox0.jpg
Save: Recognized/train\Ants\ants-1-_png.rf.809de1265776e6576f1633d8656dda40_bbox0.jpg
Save: Recognized/train\Ants\ants-1-_png.rf.e4e6047152ea7ab2d012b00013d1029e_bbox0.jpg
Save: Recognized/train\Ants\ants-100-_jpg.rf.7d5b8b923da794ecaaa1a84d9b8dd789_bbox0.jpg
Save: Recognized/train\Ants\ants-100-_jpg.rf.ca94e096859f9330f96813c0c020330c_bbox0.jpg
Save: Recognized/train\Ants\ants-100-_jpg.rf.fba81