Setup Yoolv7 install and requirements

In [None]:
!git clone https://github.com/WongKinYiu/yolov7
%cd yolov7
!pip install -r requirements.txt


fatal: destination path 'yolov7' already exists and is not an empty directory.
/content/yolov7


Import Bee Dataset

In [None]:
!pip install kagglehub
import kagglehub

# Download latest version
path = kagglehub.dataset_download("ashfaqsyed/bees-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/ashfaqsyed/bees-dataset?dataset_version_number=1...


100%|██████████| 1.92G/1.92G [00:12<00:00, 161MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/ashfaqsyed/bees-dataset/versions/1


Verify and move to Visible Dictionary

In [None]:
import os

# Verify the files in the dataset directory
dataset_path = "/root/.cache/kagglehub/datasets/ashfaqsyed/bees-dataset/versions/1"
print("Files in dataset directory:", os.listdir(dataset_path))

import shutil

# Define the source and destination directories
source_dir = "/root/.cache/kagglehub/datasets/ashfaqsyed/bees-dataset/versions/1"
destination_dir = "/content/bees-dataset"

# Copy the dataset to the destination directory
shutil.copytree(source_dir, destination_dir)

print("Dataset copied to:", destination_dir)

Files in dataset directory: ['ML-Data']
Dataset copied to: /content/bees-dataset


Find All Classes

In [None]:
import os
import glob
import xml.etree.ElementTree as ET

def list_classes(xml_dir):
    classes = set()
    for xml_file in glob.glob(os.path.join(xml_dir, '*.xml')):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for obj in root.iter('object'):
            cls = obj.find('name').text
            classes.add(cls)
    return list(classes)

# Path to your dataset annotations
xml_dir = "/content/bees-dataset/ML-Data"  # Update this path if necessary

# List all unique class types
class_list = list_classes(xml_dir)
print("Unique class types found:", class_list)

Unique class types found: ['bee', 'pollen']


Convert Annotations

In [None]:
import os
import glob
import xml.etree.ElementTree as ET

# Define the classes
classes = ['pollen', "bee"]  # Update this list with all your classes

def convert(size, box):
    dw = 1. / size[0]
    dh = 1. / size[1]
    x = (box[0] + box[1]) / 2.0 - 1
    y = (box[2] + box[3]) / 2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)

def convert_annotation(xml_file, output_dir):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    with open(os.path.join(output_dir, os.path.splitext(os.path.basename(xml_file))[0] + '.txt'), 'w') as out_file:
        for obj in root.iter('object'):
            difficult = obj.find('difficult').text
            cls = obj.find('name').text
            if cls not in classes or int(difficult) == 1:
                continue
            cls_id = classes.index(cls)
            xmlbox = obj.find('bndbox')
            b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
            bb = convert((w, h), b)
            out_file.write(f"{cls_id} " + " ".join([str(a) for a in bb]) + '\n')

# Path to your dataset
input_dir = "/content/bees-dataset/ML-Data"  # Update this path if necessary
output_dir = "/content/bees-dataset/labels"

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

for xml_file in glob.glob(os.path.join(input_dir, '*.xml')):
    convert_annotation(xml_file, output_dir)

print("Annotations converted to YOLO format and saved in:", output_dir)

Annotations converted to YOLO format and saved in: /content/bees-dataset/labels


Arrange Data for Training

In [None]:
import os
import shutil
import random

def organize_dataset(base_dir):
    # Define paths
    ml_data_dir = os.path.join(base_dir, 'ML-Data')
    labels_dir = os.path.join(base_dir, 'labels')
    images_dir = os.path.join(base_dir, 'images')
    spare_dir = os.path.join(base_dir, 'spare')

    # Create directories if they don't exist
    for dir_path in [images_dir, labels_dir, spare_dir]:
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)

    for subdir in ['train', 'val']:
        for dir_path in [images_dir, labels_dir]:
            subdir_path = os.path.join(dir_path, subdir)
            if not os.path.exists(subdir_path):
                os.makedirs(subdir_path)

    # List all files in the ML-Data directory
    all_files = os.listdir(ml_data_dir)

    # Separate images and annotations
    image_files = [f for f in all_files if f.endswith(('.jpg', '.jpeg', '.png'))]

    # Split data into train and val sets (80-20 split)
    random.shuffle(image_files)
    split_idx = int(0.8 * len(image_files))
    train_images = image_files[:split_idx]
    val_images = image_files[split_idx:]

    # Move images to train and val folders
    for img_file in train_images:
        shutil.move(os.path.join(ml_data_dir, img_file), os.path.join(images_dir, 'train', img_file))

    for img_file in val_images:
        shutil.move(os.path.join(ml_data_dir, img_file), os.path.join(images_dir, 'val', img_file))

    # Move annotations to train and val folders
    annotation_files = os.listdir(labels_dir)
    for ann_file in annotation_files:
        base_name = os.path.splitext(ann_file)[0]
        if base_name + '.jpg' in train_images or base_name + '.jpeg' in train_images or base_name + '.png' in train_images:
            shutil.move(os.path.join(labels_dir, ann_file), os.path.join(labels_dir, 'train', ann_file))
        elif base_name + '.jpg' in val_images or base_name + '.jpeg' in val_images or base_name + '.png' in val_images:
            shutil.move(os.path.join(labels_dir, ann_file), os.path.join(labels_dir, 'val', ann_file))

    # Move any remaining XML files to the spare folder
    xml_files = [f for f in all_files if f.endswith('.xml')]
    for xml_file in xml_files:
        shutil.move(os.path.join(ml_data_dir, xml_file), os.path.join(spare_dir, xml_file))

    print("Dataset organized successfully!")

# Path to your dataset directory
base_dir = "/content/bees-dataset"

# Organize the dataset
organize_dataset(base_dir)

Dataset organized successfully!


Configure Yolo data yaml

In [None]:
data_yaml = """
train: /content/bees-dataset/images/train
val: /content/bees-dataset/images/val
nc: 2  # number of classes
names: ['pollen', 'bee']
"""

with open('/content/bees-dataset/data.yaml', 'w') as file:
    file.write(data_yaml)

print("data.yaml file created successfully!")

data.yaml file created successfully!


Train Model Yolov7

In [None]:
# download COCO starting checkpoint
%cd /content/yolov7
!wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7_training.pt

/content/yolov7
--2025-01-27 07:07:58--  https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7_training.pt
Resolving github.com (github.com)... 140.82.114.4
Connecting to github.com (github.com)|140.82.114.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/511187726/13e046d1-f7f0-43ab-910b-480613181b1f?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20250127%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250127T070758Z&X-Amz-Expires=300&X-Amz-Signature=ab50cdfbbd176847d1cd4b7b4ba08870911d75463997894aae2e49354bf0fb9a&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dyolov7_training.pt&response-content-type=application%2Foctet-stream [following]
--2025-01-27 07:07:58--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/511187726/13e046d1-f7f0-43ab-910b-480613181b1f?X-Amz-Algorithm=AWS4-HM

In [None]:
# run this cell to begin training
%cd /content/yolov7
!python train.py --batch 16 --epochs 55 --data /content/bees-dataset/data.yaml --weights 'yolov7_training.pt' --device 0

/content/yolov7
2025-01-27 07:08:11.732138: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-27 07:08:11.750469: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-27 07:08:11.772454: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-27 07:08:11.779135: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-27 07:08:11.794981: I tensorflow/core/platform/

Test and Confirm

In [None]:
# Run the testing script
!python test.py --data /content/bees-dataset/data.yaml --img 640 --batch 16 --conf 0.001 --iou 0.65 --device 0 --weights runs/train/exp/weights/best.pt --name yolov7_test

Namespace(weights=['runs/train/exp/weights/best.pt'], data='/content/bees-dataset/data.yaml', batch_size=16, img_size=640, conf_thres=0.001, iou_thres=0.65, task='val', device='0', single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project='runs/test', name='yolov7_test', exist_ok=False, no_trace=False, v5_metric=False)
YOLOR 🚀 v0.1-128-ga207844 torch 2.5.1+cu121 CUDA:0 (NVIDIA A100-SXM4-40GB, 40513.8125MB)

  ckpt = torch.load(w, map_location=map_location)  # load
Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
IDetect.fuse
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Model Summary: 314 layers, 36487166 parameters, 6194944 gradients, 103.2 GFLOPS
 Convert model to Traced-model... 
 traced_script_module saved! 
 model is traced! 

  cache, exists = torch.load(cache_path), True  # load
[34m[1mval: [0mScanning '/content/bees-dataset/labels/val.cac

Download Trained Weights

In [None]:
from google.colab import files

# Download the best weights
files.download('/content/yolov7/runs/train/exp/weights/best.pt')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Download Performance Graphs!

In [None]:
# Zip the runs directory containing training logs
!zip -r /content/yolov7/runs/train/exp.zip /content/yolov7/runs/train/exp

# Download the zipped logs
files.download('/content/yolov7/runs/train/exp.zip')

updating: content/yolov7/runs/train/exp/ (stored 0%)
updating: content/yolov7/runs/train/exp/train_batch1.jpg (deflated 3%)
updating: content/yolov7/runs/train/exp/confusion_matrix.png (deflated 38%)
updating: content/yolov7/runs/train/exp/test_batch0_pred.jpg (deflated 4%)
updating: content/yolov7/runs/train/exp/opt.yaml (deflated 47%)
updating: content/yolov7/runs/train/exp/weights/ (stored 0%)
updating: content/yolov7/runs/train/exp/weights/epoch_049.pt (deflated 7%)
updating: content/yolov7/runs/train/exp/weights/epoch_054.pt (deflated 7%)
updating: content/yolov7/runs/train/exp/weights/epoch_051.pt (deflated 7%)
updating: content/yolov7/runs/train/exp/weights/epoch_024.pt (deflated 7%)
updating: content/yolov7/runs/train/exp/weights/epoch_053.pt (deflated 7%)
updating: content/yolov7/runs/train/exp/weights/epoch_000.pt (deflated 7%)
updating: content/yolov7/runs/train/exp/weights/epoch_052.pt (deflated 7%)
updating: content/yolov7/runs/train/exp/weights/last.pt (deflated 8%)
updat

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Copy the zipped logs to Google Drive
shutil.copy('/content/yolov7/runs/train/exp.zip', '/content/drive/My Drive/exp.zip')

Mounted at /content/drive


'/content/drive/My Drive/exp.zip'

In [None]:
import os
# Create the Bee-Training folder in Google Drive
os.makedirs('/content/drive/My Drive/Bee-Training', exist_ok=True)

# Copy the zipped logs to the Bee-Training folder in Google Drive
shutil.copy('/content/yolov7/runs/train/exp.zip', '/content/drive/My Drive/Bee-Training/exp.zip')

# Check if the file has been copied successfully
file_exists = os.path.isfile('/content/drive/My Drive/Bee-Training/exp.zip')
print(f"File copied successfully: {file_exists}")

File copied successfully: True


Train Model Yolov8

In [None]:
!git clone https://github.com/ultralytics/ultralytics
%cd ultralytics

Cloning into 'ultralytics'...
remote: Enumerating objects: 50876, done.[K
remote: Counting objects: 100% (948/948), done.[K
remote: Compressing objects: 100% (382/382), done.[K
remote: Total 50876 (delta 844), reused 569 (delta 566), pack-reused 49928 (from 4)[K
Receiving objects: 100% (50876/50876), 29.20 MiB | 30.76 MiB/s, done.
Resolving deltas: 100% (37559/37559), done.
/content/ultralytics


In [None]:
!pip install -e


Usage:   
  pip3 install [options] <requirement specifier> [package-index-options] ...
  pip3 install [options] -r <requirements file> [package-index-options] ...
  pip3 install [options] [-e] <vcs project url> ...
  pip3 install [options] [-e] <local project path> ...
  pip3 install [options] <archive url/path> ...

-e option requires 1 argument


In [None]:
!yolo task=detect mode=train model=yolov8n.pt data=/content/bees-dataset/data.yaml epochs=50 imgsz=640 batch=16 name=yolov8n-bees

/bin/bash: line 1: yolo: command not found
