## Mapping New Id For New Class

| New Class ID | New Class Name | Old Class IDs Grouped |
| ------------ | -------------- | --------------------- |
| 0            | three\_wheels  | 2, 6, 13              |
| 1            | bus            | 4, 7, 10              |
| 2            | car            | 0, 5, 12, 9           |
| 3            | tractor        | 14                    |
| 4            | truck          | 8, 16, 15             |
| 5            | ambulance      | 1                     |
| 6            | van            | 17                    |
| 7            | motorbikes     | 3, 11                 |


## Reduce the classes of the dataset


In [6]:
from pathlib import Path

# Original dataset with nested /labels folders
dataset_dir = Path(r"data\Nepali_vehicle_dataset")
output_dir = Path(r"data\Nepali_vehicle_dataset_reduced")
output_dir.mkdir(parents=True, exist_ok=True)

# Class ID remap
remap = {
    2: 0, 6: 0, 13: 0,        # three_wheels
    4: 1, 7: 1, 10: 1,        # bus
    0: 2, 5: 2, 12: 2, 9: 2,  # car
    14: 3,                   # tractor
    8: 4, 16: 4, 15: 4,       # truck
    1: 5,                    # ambulance
    17: 6,                   # van
    3: 7, 11: 7              # motorbikes
}

# Debugging info
print("Starting class remapping...")

# Traverse all folders like SUV/labels, bus/labels, etc.
for class_folder in dataset_dir.iterdir():
    if not class_folder.is_dir():
        continue
    
    label_dir = class_folder / "labels"
    if not label_dir.exists():
        continue
    
    for label_file in label_dir.glob("*.txt"):
        with open(label_file, "r") as f:
            lines = f.readlines()

        new_lines = []
        for line in lines:
            parts = line.strip().split()
            if not parts:
                continue
            try:
                old_class_id = int(parts[0])
                if old_class_id not in remap:
                    print(f"Skipping unknown ID {old_class_id} in {label_file}")
                    continue
                new_class_id = remap[old_class_id]
                parts[0] = str(new_class_id)
                new_lines.append(" ".join(parts))
            except ValueError:
                print(f"Invalid line in {label_file}: {line}")

        # Only write if there's valid data
        if new_lines:
            relative_path = label_file.relative_to(dataset_dir)
            new_label_path = output_dir / relative_path
            new_label_path.parent.mkdir(parents=True, exist_ok=True)
            with open(new_label_path, "w") as f:
                f.write("\n".join(new_lines))
            print(f"Written: {new_label_path}")

print("Remapping complete.")


Starting class remapping...
Written: data\Nepali_vehicle_dataset_reduced\ambulance\labels\ambulance_1.txt
Written: data\Nepali_vehicle_dataset_reduced\ambulance\labels\ambulance_10.txt
Written: data\Nepali_vehicle_dataset_reduced\ambulance\labels\ambulance_100.txt
Written: data\Nepali_vehicle_dataset_reduced\ambulance\labels\ambulance_101.txt
Written: data\Nepali_vehicle_dataset_reduced\ambulance\labels\ambulance_102.txt
Written: data\Nepali_vehicle_dataset_reduced\ambulance\labels\ambulance_103.txt
Written: data\Nepali_vehicle_dataset_reduced\ambulance\labels\ambulance_104.txt
Written: data\Nepali_vehicle_dataset_reduced\ambulance\labels\ambulance_105.txt
Written: data\Nepali_vehicle_dataset_reduced\ambulance\labels\ambulance_106.txt
Written: data\Nepali_vehicle_dataset_reduced\ambulance\labels\ambulance_107.txt
Written: data\Nepali_vehicle_dataset_reduced\ambulance\labels\ambulance_108.txt
Written: data\Nepali_vehicle_dataset_reduced\ambulance\labels\ambulance_109.txt
Written: data\N

## Copy all the files from one folder to another Function

In [7]:
import shutil
from pathlib import Path

def copy_files(src_folder: Path, dst_folder: Path):
    # Source and destination folders
    src_folder = src_folder
    dst_folder = dst_folder

    # Create destination folder if it doesn't exist
    dst_folder.mkdir(parents=True, exist_ok=True)

    # Copy all files from src to dst
    for file in src_folder.glob("*.*"):  # or "*.jpg", "*.txt", etc.
        if file.is_file():
            shutil.copy(file, dst_folder / file.name)
    print(f"Copied files from {src_folder} to {dst_folder} successfully.")

## Copy Missing Image

In [4]:
# 1. Ambulance
copy_files(Path(r"data\Nepali_vehicle_dataset\ambulance\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\ambulance\images"))

Copied files from data\Nepali_vehicle_dataset\ambulance\images to data\Nepali_vehicle_dataset_reduced\ambulance\images successfully.


In [8]:
# 2. Auto Rickshaw
copy_files(Path(r"data\Nepali_vehicle_dataset\auto_rickshaw\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\auto_rickshaw\images"))

Copied files from data\Nepali_vehicle_dataset\auto_rickshaw\images to data\Nepali_vehicle_dataset_reduced\auto_rickshaw\images successfully.


In [9]:
# 3. Bikes
copy_files(Path(r"data\Nepali_vehicle_dataset\bikes\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\bikes\images"))

Copied files from data\Nepali_vehicle_dataset\bikes\images to data\Nepali_vehicle_dataset_reduced\bikes\images successfully.


In [10]:
# 4. Bus
copy_files(Path(r"data\Nepali_vehicle_dataset\bus\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\bus\images"))

Copied files from data\Nepali_vehicle_dataset\bus\images to data\Nepali_vehicle_dataset_reduced\bus\images successfully.


In [11]:
# 5. Car
copy_files(Path(r"data\Nepali_vehicle_dataset\car\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\car\images"))

Copied files from data\Nepali_vehicle_dataset\car\images to data\Nepali_vehicle_dataset_reduced\car\images successfully.


In [12]:
# 6. E Rickshaw
copy_files(Path(r"data\Nepali_vehicle_dataset\e-rickshaw\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\e-rickshaw\images"))

Copied files from data\Nepali_vehicle_dataset\e-rickshaw\images to data\Nepali_vehicle_dataset_reduced\e-rickshaw\images successfully.


In [13]:
# 7. Microbus
copy_files(Path(r"data\Nepali_vehicle_dataset\micro_bus\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\micro_bus\images"))

Copied files from data\Nepali_vehicle_dataset\micro_bus\images to data\Nepali_vehicle_dataset_reduced\micro_bus\images successfully.


In [14]:
# 8. Mini Truck
copy_files(Path(r"data\Nepali_vehicle_dataset\mini_truck\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\mini_truck\images"))

Copied files from data\Nepali_vehicle_dataset\mini_truck\images to data\Nepali_vehicle_dataset_reduced\mini_truck\images successfully.


In [15]:
# 9. Police Vehicle
copy_files(Path(r"data\Nepali_vehicle_dataset\police_vehicle\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\police_vehicle\images"))

Copied files from data\Nepali_vehicle_dataset\police_vehicle\images to data\Nepali_vehicle_dataset_reduced\police_vehicle\images successfully.


In [16]:
# 10. School Bus
copy_files(Path(r"data\Nepali_vehicle_dataset\school_bus\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\school_bus\images"))

Copied files from data\Nepali_vehicle_dataset\school_bus\images to data\Nepali_vehicle_dataset_reduced\school_bus\images successfully.


In [17]:
# 11. Scooter
copy_files(Path(r"data\Nepali_vehicle_dataset\scooter\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\scooter\images"))

Copied files from data\Nepali_vehicle_dataset\scooter\images to data\Nepali_vehicle_dataset_reduced\scooter\images successfully.


In [18]:
# 12. SUV
copy_files(Path(r"data\Nepali_vehicle_dataset\SUV\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\SUV\images"))

Copied files from data\Nepali_vehicle_dataset\SUV\images to data\Nepali_vehicle_dataset_reduced\SUV\images successfully.


In [19]:
# 13. Taxi
copy_files(Path(r"data\Nepali_vehicle_dataset\taxi\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\taxi\images"))

Copied files from data\Nepali_vehicle_dataset\taxi\images to data\Nepali_vehicle_dataset_reduced\taxi\images successfully.


In [20]:
# 14. Tempo
copy_files(Path(r"data\Nepali_vehicle_dataset\tempo\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\tempo\images"))

Copied files from data\Nepali_vehicle_dataset\tempo\images to data\Nepali_vehicle_dataset_reduced\tempo\images successfully.


In [21]:
# 15. Tractor
copy_files(Path(r"data\Nepali_vehicle_dataset\tractor\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\tractor\images"))

Copied files from data\Nepali_vehicle_dataset\tractor\images to data\Nepali_vehicle_dataset_reduced\tractor\images successfully.


In [22]:
# 16. Transport Vehicle
copy_files(Path(r"data\Nepali_vehicle_dataset\transport_vehicle\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\transport_vehicle\images"))

Copied files from data\Nepali_vehicle_dataset\transport_vehicle\images to data\Nepali_vehicle_dataset_reduced\transport_vehicle\images successfully.


In [23]:
# 17. Truck
copy_files(Path(r"data\Nepali_vehicle_dataset\truck\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\truck\images"))

Copied files from data\Nepali_vehicle_dataset\truck\images to data\Nepali_vehicle_dataset_reduced\truck\images successfully.


In [24]:
# 18. Van
copy_files(Path(r"data\Nepali_vehicle_dataset\van\images"),
           Path(r"data\Nepali_vehicle_dataset_reduced\van\images"))

Copied files from data\Nepali_vehicle_dataset\van\images to data\Nepali_vehicle_dataset_reduced\van\images successfully.


## Convert Data Label Format From Yolo To CoCo function

In [2]:
import os
import json
from PIL import Image

def convert_yolo_to_coco(dataset_path, category_names):
    """
    Convert YOLO-format annotations (txt) into COCO-format JSON.
    Expects YOLO labels already remapped to 0..N-1.
    """
    images_dir = os.path.join(dataset_path, "images")
    labels_dir = os.path.join(dataset_path, "labels")

    coco_output = {
        "info": {
            "description": "Nepali Vehicle Dataset",
            "version": "1.0",
            "year": 2025,
            "contributor": "Custom Conversion",
            "date_created": "2025-08-19"
        },
        "licenses": [
            {
                "id": 1,
                "name": "Unknown",
                "url": "http://example.com"
            }
        ],
        "images": [],
        "annotations": [],
        "categories": []
    }

    # Build categories from given names (0-based)
    for i, name in enumerate(category_names):
        coco_output["categories"].append({
            "id": i,
            "name": name,
            "supercategory": "vehicle"
        })

    annotation_id = 1
    image_id = 1

    for image_filename in os.listdir(images_dir):
        if not image_filename.lower().endswith((".jpg", ".jpeg", ".png")):
            continue

        image_path = os.path.join(images_dir, image_filename)
        label_path = os.path.join(labels_dir, os.path.splitext(image_filename)[0] + ".txt")

        if not os.path.exists(label_path):
            continue

        with Image.open(image_path) as img:
            width, height = img.size

        # Add image entry
        coco_output["images"].append({
            "id": image_id,
            "file_name": image_filename,
            "width": width,
            "height": height
        })

        # Read YOLO label file
        with open(label_path, "r") as f:
            for line in f.readlines():
                parts = line.strip().split()
                if len(parts) != 5:
                    continue
                class_id, x_center, y_center, bbox_width, bbox_height = map(float, parts)
                class_id = int(class_id)  # already 0–7 after your remap script

                # Convert YOLO -> COCO
                x = (x_center - bbox_width / 2) * width
                y = (y_center - bbox_height / 2) * height
                w = bbox_width * width
                h = bbox_height * height

                coco_output["annotations"].append({
                    "id": annotation_id,
                    "image_id": image_id,
                    "category_id": class_id,
                    "bbox": [x, y, w, h],
                    "area": w * h,
                    "iscrowd": 0
                })
                annotation_id += 1

        image_id += 1

    # Save
    output_json_path = os.path.join(dataset_path, "_annotations.coco.json")
    with open(output_json_path, "w") as f:
        json.dump(coco_output, f, indent=4)

    print(f"YOLO → COCO conversion complete! Saved to {output_json_path}")


## Function to count how many classes are there in a YOLO dataset

In [3]:
import os
from collections import Counter

def count_yolo_classes(labels_dir):
    class_ids = []
    for root, _, files in os.walk(labels_dir):
        for file in files:
            if file.endswith(".txt"):
                path = os.path.join(root, file)
                with open(path, "r") as f:
                    for line in f:
                        if line.strip():
                            try:
                                class_id = int(line.split()[0])
                                class_ids.append(class_id)
                            except ValueError:
                                print(f"⚠️ Skipping malformed line in {path}: {line.strip()}")
    
    counter = Counter(class_ids)
    return counter

## Split CoCo Data Set into Train, Valid, Test (70/20/10)

In [4]:
from pycocotools.coco import COCO
import json, os, shutil
import numpy as np

def split_coco_dataset(dataset_dir, ann_filename="_annotations.coco.json", 
                       splits=(0.7, 0.2, 0.1), 
                       output_dirs=("train", "valid", "test")):
    """
    Split a COCO dataset into train/val/test subsets and save images + annotation files.
    
    Args:
        dataset_dir (str): Root folder containing 'images' and the annotation JSON file.
        ann_filename (str): Name of the annotation JSON file (default "_annotations.coco.json").
        splits (tuple): Fractions for train, val, test (should sum <= 1.0).
        output_dirs (tuple): Names of subfolders for train/val/test splits.
    """
    ann_file = os.path.join(dataset_dir, f"images/{ann_filename}")

    # Load original annotation file
    with open(ann_file, "r") as f:
        ann_data = json.load(f)

    # COCO helper
    coco = COCO(ann_file)
    image_ids = coco.getImgIds()
    np.random.shuffle(image_ids)

    n_total = len(image_ids)
    n_train = int(splits[0] * n_total)
    n_val = int(splits[1] * n_total)

    train_ids = image_ids[:n_train]
    val_ids = image_ids[n_train:n_train + n_val]
    test_ids = image_ids[n_train + n_val:]

    def save_subset(ids, save_dir, save_json):
        os.makedirs(save_dir, exist_ok=True)
        
        images = [coco.loadImgs(i)[0] for i in ids]
        anns = sum([coco.loadAnns(coco.getAnnIds(imgIds=i)) for i in ids], [])
        
        subset = {
            "info": ann_data.get("info", {}),
            "licenses": ann_data.get("licenses", []),
            "images": images,
            "annotations": anns,
            "categories": coco.loadCats(coco.getCatIds()),
        }

        with open(save_json, "w") as f:
            json.dump(subset, f, indent=2)

        for img in images:
            src = os.path.join(dataset_dir, "images", img["file_name"])
            dst = os.path.join(save_dir, img["file_name"])
            shutil.copy(src, dst)

    # Save splits
    save_subset(train_ids, os.path.join(dataset_dir, output_dirs[0]), 
                os.path.join(dataset_dir, f"{output_dirs[0]}/{ann_filename}"))
    save_subset(val_ids, os.path.join(dataset_dir, output_dirs[1]), 
                os.path.join(dataset_dir, f"{output_dirs[1]}/{ann_filename}"))
    save_subset(test_ids, os.path.join(dataset_dir, output_dirs[2]), 
                os.path.join(dataset_dir, f"{output_dirs[2]}/{ann_filename}"))

    print(f"Dataset split complete: {len(train_ids)} train, {len(val_ids)} val, {len(test_ids)} test images.")

## Process Ambulance Data

In [12]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\ambulance\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")


Found 8 unique classes
Class distribution:
  Class 0: 32 instances
  Class 1: 64 instances
  Class 2: 149 instances
  Class 3: 1 instances
  Class 4: 56 instances
  Class 5: 338 instances
  Class 6: 8 instances
  Class 7: 133 instances


In [15]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\ambulance", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\ambulance\_annotations.coco.json


In [None]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\ambulance",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

##  Process Auto Rickshaw Data 

In [7]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\auto_rickshaw\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")

Found 8 unique classes
Class distribution:
  Class 0: 467 instances
  Class 1: 39 instances
  Class 2: 32 instances
  Class 3: 1 instances
  Class 4: 10 instances
  Class 5: 2 instances
  Class 6: 4 instances
  Class 7: 148 instances


In [26]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\auto_rickshaw", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\auto_rickshaw\_annotations.coco.json


In [27]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\auto_rickshaw",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dataset split complete: 208 train, 59 val, 31 test images.


## Process Bikes Data

In [28]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\bikes\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")

Found 1 unique classes
Class distribution:
  Class 7: 447 instances


In [30]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\bikes", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\bikes\_annotations.coco.json


In [32]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\bikes",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dataset split complete: 216 train, 61 val, 32 test images.


## Process Bus Data

In [34]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\bus\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")

Found 8 unique classes
Class distribution:
  Class 0: 6 instances
  Class 1: 505 instances
  Class 2: 66 instances
  Class 3: 2 instances
  Class 4: 22 instances
  Class 5: 1 instances
  Class 6: 8 instances
  Class 7: 36 instances


In [35]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\bus", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\bus\_annotations.coco.json


In [36]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\bus",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dataset split complete: 210 train, 60 val, 30 test images.


## Process Car Data

In [8]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\car\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")

Found 7 unique classes
Class distribution:
  Class 0: 18 instances
  Class 1: 27 instances
  Class 2: 399 instances
  Class 3: 1 instances
  Class 4: 13 instances
  Class 6: 9 instances
  Class 7: 164 instances


In [9]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\car", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\car\_annotations.coco.json


In [10]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\car",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dataset split complete: 213 train, 61 val, 31 test images.


## Process E-Rickshaw Data

In [5]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\e-rickshaw\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")

Found 7 unique classes
Class distribution:
  Class 0: 482 instances
  Class 1: 14 instances
  Class 2: 21 instances
  Class 3: 1 instances
  Class 4: 4 instances
  Class 5: 1 instances
  Class 7: 95 instances


In [6]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\e-rickshaw", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\e-rickshaw\_annotations.coco.json


In [7]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\e-rickshaw",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dataset split complete: 210 train, 60 val, 30 test images.


## Process Mirco-bus Data

In [8]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\micro_bus\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")

Found 6 unique classes
Class distribution:
  Class 0: 10 instances
  Class 1: 358 instances
  Class 2: 87 instances
  Class 4: 14 instances
  Class 6: 2 instances
  Class 7: 161 instances


In [9]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\micro_bus", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\micro_bus\_annotations.coco.json


In [10]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\micro_bus",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dataset split complete: 210 train, 60 val, 31 test images.


## Process Mini Truck Data

In [11]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\mini_truck\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")

Found 6 unique classes
Class distribution:
  Class 0: 9 instances
  Class 1: 57 instances
  Class 2: 116 instances
  Class 4: 348 instances
  Class 6: 14 instances
  Class 7: 256 instances


In [12]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\mini_truck", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\mini_truck\_annotations.coco.json


In [14]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\mini_truck",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dataset split complete: 212 train, 60 val, 31 test images.


## Process Police Vehicle Data

In [15]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\police_vehicle\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")

Found 5 unique classes
Class distribution:
  Class 1: 2 instances
  Class 2: 67 instances
  Class 4: 1 instances
  Class 6: 1 instances
  Class 7: 8 instances


In [16]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\police_vehicle", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\police_vehicle\_annotations.coco.json


In [17]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\police_vehicle",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dataset split complete: 44 train, 12 val, 7 test images.


## Process School Bus Vehicle Data

In [18]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\school_bus\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")

Found 6 unique classes
Class distribution:
  Class 0: 20 instances
  Class 1: 330 instances
  Class 2: 69 instances
  Class 4: 19 instances
  Class 6: 5 instances
  Class 7: 168 instances


In [19]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\school_bus", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\school_bus\_annotations.coco.json


In [20]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\school_bus",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dataset split complete: 210 train, 60 val, 30 test images.


## Process Scooter Data

In [21]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\scooter\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")

Found 8 unique classes
Class distribution:
  Class 0: 20 instances
  Class 1: 8 instances
  Class 2: 24 instances
  Class 3: 2 instances
  Class 4: 7 instances
  Class 5: 1 instances
  Class 6: 4 instances
  Class 7: 438 instances


In [22]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\scooter", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\scooter\_annotations.coco.json


In [23]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\scooter",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dataset split complete: 214 train, 61 val, 31 test images.


## Process SUV Data

In [24]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\SUV\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")

Found 6 unique classes
Class distribution:
  Class 0: 9 instances
  Class 1: 25 instances
  Class 2: 409 instances
  Class 4: 9 instances
  Class 6: 4 instances
  Class 7: 96 instances


In [25]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\SUV", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\SUV\_annotations.coco.json


In [26]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\SUV",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dataset split complete: 224 train, 64 val, 33 test images.


## Process Taxi Data

In [27]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\taxi\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")

Found 6 unique classes
Class distribution:
  Class 0: 6 instances
  Class 1: 40 instances
  Class 2: 449 instances
  Class 4: 9 instances
  Class 6: 7 instances
  Class 7: 268 instances


In [28]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\taxi", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\taxi\_annotations.coco.json


In [29]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\taxi",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dataset split complete: 210 train, 60 val, 30 test images.


## Process Tempo Data

In [30]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\tempo\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")

Found 6 unique classes
Class distribution:
  Class 0: 355 instances
  Class 1: 48 instances
  Class 2: 133 instances
  Class 4: 6 instances
  Class 6: 13 instances
  Class 7: 229 instances


In [32]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\tempo", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\tempo\_annotations.coco.json


In [33]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\tempo",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dataset split complete: 210 train, 60 val, 30 test images.


## Process Tractor Data

In [34]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\tractor\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")

Found 6 unique classes
Class distribution:
  Class 0: 16 instances
  Class 1: 1 instances
  Class 2: 7 instances
  Class 3: 82 instances
  Class 4: 7 instances
  Class 7: 52 instances


In [35]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\tractor", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\tractor\_annotations.coco.json


In [36]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\tractor",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dataset split complete: 55 train, 15 val, 9 test images.


## Process Transport Vehicle Data

In [37]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\transport_vehicle\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")

Found 6 unique classes
Class distribution:
  Class 0: 15 instances
  Class 1: 59 instances
  Class 2: 113 instances
  Class 4: 359 instances
  Class 6: 8 instances
  Class 7: 235 instances


In [38]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\transport_vehicle", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\transport_vehicle\_annotations.coco.json


In [40]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\transport_vehicle",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dataset split complete: 219 train, 62 val, 32 test images.


## Process Truck Data

In [41]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\truck\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")

Found 8 unique classes
Class distribution:
  Class 0: 10 instances
  Class 1: 27 instances
  Class 2: 56 instances
  Class 3: 2 instances
  Class 4: 237 instances
  Class 5: 1 instances
  Class 6: 6 instances
  Class 7: 115 instances


In [42]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\truck", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\truck\_annotations.coco.json


In [43]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\truck",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dataset split complete: 129 train, 37 val, 19 test images.


## Process Van Data

In [44]:
# investigate number of unique classes in dataset
labels_dir = r"data\Nepali_vehicle_dataset_reduced\van\labels"   # Windows style path


class_counter = count_yolo_classes(labels_dir)

print(f"Found {len(class_counter)} unique classes")
print("Class distribution:")
for cls, count in sorted(class_counter.items()):
    print(f"  Class {cls}: {count} instances")

Found 6 unique classes
Class distribution:
  Class 0: 12 instances
  Class 1: 36 instances
  Class 2: 68 instances
  Class 4: 8 instances
  Class 6: 319 instances
  Class 7: 117 instances


In [45]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\van", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\van\_annotations.coco.json


In [47]:
# create COCO annotations json file
category_names = [
    "three_wheels",
    "bus",
    "car",
    "tractor",
    "truck",
    "ambulance",
    "van",
    "motorbikes"
]

convert_yolo_to_coco(r"data\Nepali_vehicle_dataset_reduced\van", category_names)

YOLO → COCO conversion complete! Saved to data\Nepali_vehicle_dataset_reduced\van\_annotations.coco.json


In [48]:
# split dataset into train, valid, and test (70/20/10)
split_coco_dataset(
    dataset_dir=r"data\Nepali_vehicle_dataset_reduced\van",
    ann_filename="_annotations.coco.json",
    splits=(0.7, 0.2, 0.1),
    output_dirs=("train", "valid", "test")
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dataset split complete: 210 train, 60 val, 30 test images.
