In [None]:
# Download Pascal VOC 2007 dataset
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar

# Extract the dataset
!tar -xf VOCtrainval_06-Nov-2007.tar


--2025-03-05 10:31:07--  http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
Resolving host.robots.ox.ac.uk (host.robots.ox.ac.uk)... 129.67.94.152
Connecting to host.robots.ox.ac.uk (host.robots.ox.ac.uk)|129.67.94.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 460032000 (439M) [application/x-tar]
Saving to: ‘VOCtrainval_06-Nov-2007.tar’


2025-03-05 10:31:40 (13.4 MB/s) - ‘VOCtrainval_06-Nov-2007.tar’ saved [460032000/460032000]



In [None]:
!ls VOCdevkit/VOC2007/


Annotations  ImageSets	JPEGImages  SegmentationClass  SegmentationObject


In [None]:
import os
import glob

# Define the dataset path
VOC_PATH = "/content/VOCdevkit/VOC2007"

# Get all image file paths
image_paths = glob.glob(os.path.join(VOC_PATH, "JPEGImages", "*.jpg"))

# Get all annotation file paths
annotation_paths = glob.glob(os.path.join(VOC_PATH, "Annotations", "*.xml"))

# Print the number of images and annotations
print(f"Total Images: {len(image_paths)}")
print(f"Total Annotations: {len(annotation_paths)}")


Total Images: 5011
Total Annotations: 5011


In [None]:
import xml.etree.ElementTree as ET

def parse_annotation(xml_file):
    """
    Parse an annotation file to extract bounding box and class information.
    """
    tree = ET.parse(xml_file)
    root = tree.getroot()

    objects = []
    for obj in root.findall("object"):
        class_name = obj.find("name").text
        bbox = obj.find("bndbox")

        xmin = int(bbox.find("xmin").text)
        ymin = int(bbox.find("ymin").text)
        xmax = int(bbox.find("xmax").text)
        ymax = int(bbox.find("ymax").text)

        objects.append({"class": class_name, "bbox": (xmin, ymin, xmax, ymax)})

    return objects

# Test parsing with the first annotation file
sample_annotation = parse_annotation(annotation_paths[0])
print(sample_annotation)


[{'class': 'car', 'bbox': (1, 57, 499, 355)}, {'class': 'car', 'bbox': (405, 216, 500, 375)}, {'class': 'person', 'bbox': (338, 8, 373, 131)}, {'class': 'person', 'bbox': (404, 7, 445, 132)}, {'class': 'person', 'bbox': (373, 99, 415, 155)}, {'class': 'person', 'bbox': (419, 108, 474, 167)}, {'class': 'person', 'bbox': (377, 22, 411, 105)}, {'class': 'person', 'bbox': (444, 29, 488, 126)}, {'class': 'person', 'bbox': (286, 18, 320, 65)}, {'class': 'person', 'bbox': (203, 28, 229, 56)}, {'class': 'person', 'bbox': (179, 23, 207, 57)}, {'class': 'person', 'bbox': (143, 27, 174, 56)}, {'class': 'person', 'bbox': (118, 15, 136, 58)}, {'class': 'person', 'bbox': (67, 27, 100, 67)}, {'class': 'person', 'bbox': (29, 33, 61, 91)}, {'class': 'person', 'bbox': (85, 31, 116, 61)}, {'class': 'person', 'bbox': (316, 19, 341, 71)}, {'class': 'person', 'bbox': (367, 19, 393, 104)}]


In [None]:
import cv2

# Define the VOC class labels (YOLO needs numeric class IDs)
VOC_CLASSES = [
    "aeroplane", "bicycle", "bird", "boat", "bottle",
    "bus", "car", "cat", "chair", "cow", "diningtable",
    "dog", "horse", "motorbike", "person", "pottedplant",
    "sheep", "sofa", "train", "tvmonitor"
]

def convert_to_yolo(image_path, annotation_data):
    """
    Convert VOC annotation to YOLO format.
    """
    img = cv2.imread(image_path)
    h, w, _ = img.shape  # Get image dimensions

    yolo_labels = []
    for obj in annotation_data:
        class_name = obj["class"]
        bbox = obj["bbox"]

        if class_name not in VOC_CLASSES:
            continue  # Skip unknown classes

        class_id = VOC_CLASSES.index(class_name)  # Get class index

        # Convert to YOLO format
        x_min, y_min, x_max, y_max = bbox
        x_center = ((x_min + x_max) / 2) / w
        y_center = ((y_min + y_max) / 2) / h
        bbox_width = (x_max - x_min) / w
        bbox_height = (y_max - y_min) / h

        yolo_labels.append(f"{class_id} {x_center:.6f} {y_center:.6f} {bbox_width:.6f} {bbox_height:.6f}")

    return yolo_labels

# Test conversion
image_path = image_paths[0]
yolo_labels = convert_to_yolo(image_path, sample_annotation)

# Print YOLO formatted labels
print("\n".join(yolo_labels))


6 0.670241 0.412000 1.335121 0.596000
6 1.213137 0.591000 0.254692 0.318000
14 0.953083 0.139000 0.093834 0.246000
14 1.138070 0.139000 0.109920 0.250000
14 1.056300 0.254000 0.112601 0.112000
14 1.197051 0.275000 0.147453 0.118000
14 1.056300 0.127000 0.091153 0.166000
14 1.249330 0.155000 0.117962 0.194000
14 0.812332 0.083000 0.091153 0.094000
14 0.579088 0.084000 0.069705 0.056000
14 0.517426 0.080000 0.075067 0.068000
14 0.424933 0.083000 0.083110 0.058000
14 0.340483 0.073000 0.048257 0.086000
14 0.223861 0.094000 0.088472 0.080000
14 0.120643 0.124000 0.085791 0.116000
14 0.269437 0.092000 0.083110 0.060000
14 0.880697 0.090000 0.067024 0.104000
14 1.018767 0.123000 0.069705 0.170000


In [None]:
import os

# Define the directory to save YOLO labels
yolo_labels_dir = "yolo_labels"
os.makedirs(yolo_labels_dir, exist_ok=True)

def save_yolo_labels(image_path, annotation_path):
    """
    Convert and save the YOLO labels in a .txt file.
    """
    annotation_data = parse_annotation(annotation_path)
    yolo_labels = convert_to_yolo(image_path, annotation_data)

    # Get filename without extension
    filename = os.path.splitext(os.path.basename(image_path))[0]

    # Save as .txt file
    label_path = os.path.join(yolo_labels_dir, f"{filename}.txt")
    with open(label_path, "w") as f:
        f.write("\n".join(yolo_labels))

    return label_path

# Convert and save labels for all images
for img_path, ann_path in zip(image_paths, annotation_paths):
    save_yolo_labels(img_path, ann_path)

# Verify by listing saved labels
!ls yolo_labels


000005.txt  001263.txt	002501.txt  003727.txt	005004.txt  006229.txt	007461.txt  008752.txt
000007.txt  001265.txt	002502.txt  003729.txt	005006.txt  006230.txt	007465.txt  008753.txt
000009.txt  001266.txt	002504.txt  003732.txt	005007.txt  006233.txt	007466.txt  008755.txt
000012.txt  001268.txt	002505.txt  003735.txt	005014.txt  006234.txt	007467.txt  008756.txt
000016.txt  001269.txt	002508.txt  003740.txt	005016.txt  006235.txt	007468.txt  008757.txt
000017.txt  001270.txt	002512.txt  003743.txt	005018.txt  006236.txt	007470.txt  008759.txt
000019.txt  001272.txt	002513.txt  003748.txt	005020.txt  006238.txt	007474.txt  008760.txt
000020.txt  001273.txt	002514.txt  003749.txt	005023.txt  006240.txt	007475.txt  008764.txt
000021.txt  001274.txt	002518.txt  003750.txt	005024.txt  006241.txt	007477.txt  008766.txt
000023.txt  001277.txt	002519.txt  003751.txt	005026.txt  006243.txt	007479.txt  008768.txt
000024.txt  001279.txt	002520.txt  003752.txt	005027.txt  006247.txt	007480.txt 

In [None]:
import os

# Define the correct paths
image_dir = "VOCdevkit/VOC2007/JPEGImages"
label_dir = "yolo_labels"

# Get the sorted list of image and label filenames
image_filenames = sorted(os.listdir(image_dir))
label_filenames = sorted(os.listdir(label_dir))

# Print the first few files to confirm
print("First 5 image files:", image_filenames[:5])
print("First 5 label files:", label_filenames[:5])


First 5 image files: ['000005.jpg', '000007.jpg', '000009.jpg', '000012.jpg', '000016.jpg']
First 5 label files: ['000005.txt', '000007.txt', '000009.txt', '000012.txt', '000016.txt']


In [None]:
image_filenames = sorted(os.listdir("VOCdevkit/VOC2007/JPEGImages"))


In [None]:
# Check if every image has a matching label file
matched_pairs = [(img, lbl) for img, lbl in zip(image_filenames, label_filenames) if img.replace(".jpg", ".txt") == lbl]

print(f"Total matched pairs: {len(matched_pairs)}")


Total matched pairs: 5011


In [None]:
import random

# Shuffle the dataset for randomness
random.seed(42)
random.shuffle(image_filenames)

# Define split sizes
total_images = len(image_filenames)
train_split = int(0.8 * total_images)
val_split = int(0.9 * total_images)  # 80% train, 10% val, 10% test

# Split into train, val, test
train_images = image_filenames[:train_split]
val_images = image_filenames[train_split:val_split]
test_images = image_filenames[val_split:]

# Print dataset sizes
print(f"Total Images: {total_images}")
print(f"Train: {len(train_images)}, Validation: {len(val_images)}, Test: {len(test_images)}")


Total Images: 5011
Train: 4008, Validation: 501, Test: 502


In [None]:
# Save file paths in text files
with open("train.txt", "w") as f:
    for img in train_images:
        f.write(f"{image_dir}/{img}\n")

with open("val.txt", "w") as f:
    for img in val_images:
        f.write(f"{image_dir}/{img}\n")

with open("test.txt", "w") as f:
    for img in test_images:
        f.write(f"{image_dir}/{img}\n")

print("Train, Validation, and Test file lists saved!")


Train, Validation, and Test file lists saved!


In [None]:

yolo_config = """classes=20
train=train.txt
valid=val.txt
names=voc.names
backup=backup/
"""
with open("voc.data", "w") as f:
    f.write(yolo_config)

print("voc.data file created!")


voc.data file created!


In [None]:
voc_classes = """aeroplane
bicycle
bird
boat
bottle
bus
car
cat
chair
cow
diningtable
dog
horse
motorbike
person
pottedplant
sheep
sofa
train
tvmonitor"""

with open("voc.names", "w") as f:
    f.write(voc_classes)

print("voc.names file created!")


voc.names file created!


In [None]:
!wget https://pjreddie.com/media/files/darknet19_448.conv.23


--2025-03-05 10:45:21--  https://pjreddie.com/media/files/darknet19_448.conv.23
Resolving pjreddie.com (pjreddie.com)... 162.0.215.52
Connecting to pjreddie.com (pjreddie.com)|162.0.215.52|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 79327120 (76M) [application/octet-stream]
Saving to: ‘darknet19_448.conv.23’


2025-03-05 10:45:35 (5.96 MB/s) - ‘darknet19_448.conv.23’ saved [79327120/79327120]



In [None]:
!wget https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov1.cfg


--2025-03-05 10:48:21--  https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov1.cfg
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2740 (2.7K) [text/plain]
Saving to: ‘yolov1.cfg’


2025-03-05 10:48:21 (49.4 MB/s) - ‘yolov1.cfg’ saved [2740/2740]



In [None]:
!git clone https://github.com/AlexeyAB/darknet.git
%cd darknet
!make


Cloning into 'darknet'...
remote: Enumerating objects: 15873, done.[K
remote: Counting objects: 100% (23/23), done.[K
remote: Compressing objects: 100% (16/16), done.[K
remote: Total 15873 (delta 12), reused 7 (delta 7), pack-reused 15850 (from 3)[K
Receiving objects: 100% (15873/15873), 14.50 MiB | 12.33 MiB/s, done.
Resolving deltas: 100% (10679/10679), done.
/content/darknet/darknet
mkdir -p ./obj/
mkdir -p backup
mkdir -p results
chmod +x *.sh
g++ -std=c++11 -std=c++11 -Iinclude/ -I3rdparty/stb/include -Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC -rdynamic -Ofast -c ./src/image_opencv.cpp -o obj/image_opencv.o
g++ -std=c++11 -std=c++11 -Iinclude/ -I3rdparty/stb/include -Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC -rdynamic -Ofast -c ./src/http_stream.cpp -o obj/http_stream.o
[01m[K./src/http_stream.cpp:[m[K In member function ‘[01m[Kbool JSON_sender::write(const char*)[m[K’:
  253 |                 int [01;35m[Kn[m[K = _wr

In [None]:
!./darknet


usage: ./darknet <function>


In [None]:
with open("voc.data", "w") as f:
    f.write("classes=20\n")
    f.write("train=train.txt\n")
    f.write("valid=val.txt\n")
    f.write("names=voc.names\n")
    f.write("backup=backup/\n")

# Verify the file exists
!ls -l voc.data


-rw-r--r-- 1 root root 72 Mar  5 11:02 voc.data


In [None]:
!./darknet detector train voc.data cfg/yolov1.cfg darknet19_448.conv.23 -gpus 0


 GPU isn't used 
 OpenCV isn't used - data augmentation will be slow 
0
yolov1
Couldn't open file: cfg/yolov1.cfg


In [None]:

!ls -l cfg/


total 1656
-rw-r--r-- 1 root root  94180 Mar  5 10:58 9k.labels
-rw-r--r-- 1 root root  99137 Mar  5 10:58 9k.names
-rw-r--r-- 1 root root 149465 Mar  5 10:58 9k.tree
-rw-r--r-- 1 root root    879 Mar  5 10:58 alexnet.cfg
-rw-r--r-- 1 root root  12277 Mar  5 10:58 cd53paspp-gamma.cfg
-rw-r--r-- 1 root root   1250 Mar  5 10:58 cifar.cfg
-rw-r--r-- 1 root root   1174 Mar  5 10:58 cifar.test.cfg
-rw-r--r-- 1 root root    387 Mar  5 10:58 coco9k.map
-rw-r--r-- 1 root root    183 Mar  5 10:58 coco.data
-rw-r--r-- 1 root root    625 Mar  5 10:58 coco.names
-rw-r--r-- 1 root root    273 Mar  5 10:58 combine9k.data
-rw-r--r-- 1 root root    535 Mar  5 10:58 crnn.train.cfg
-rw-r--r-- 1 root root   7723 Mar  5 10:58 csdarknet53-omega.cfg
-rw-r--r-- 1 root root  28083 Mar  5 10:58 cspx-p7-mish.cfg
-rw-r--r-- 1 root root  30733 Mar  5 10:58 cspx-p7-mish_hp.cfg
-rw-r--r-- 1 root root  14987 Mar  5 10:58 cspx-p7-mish-omega.cfg
-rw-r--r-- 1 root root  10625 Mar  5 10:58 csresnext50-panet-spp.cfg
-rw-

In [None]:
i !./darknet detector train voc.data cfg/yolov1.cfg yolov1.weights -gpus 0


NameError: name 'inputs' is not defined

In [None]:
!cd ..
!ls -l


In [None]:
!pwd


In [None]:
%cd /content/darknet/darknet


In [None]:
!ls -l


In [None]:
!make



In [None]:
!./darknet detector train voc.data cfg/yolov1.cfg yolov1.weights -gpus 0


In [None]:
!cd /content/darknet && ls -l


In [None]:
cd /content/darknet


In [None]:
!rm -rf /content/darknet
!git clone https://github.com/AlexeyAB/darknet.git
!cd darknet
