In [8]:
from pathlib import Path
import os

# Where we will store YOLO-ready data
YOLO_DIR = Path(r"D:\MachineLearingProject\YOLODataset")

# Create folders
for sub in ["images/train", "images/val", "labels/train", "labels/val"]:
    path = YOLO_DIR / sub
    path.mkdir(parents=True, exist_ok=True)

YOLO_DIR


WindowsPath('D:/MachineLearingProject/YOLODataset')

In [9]:
BASE_DIR = Path(r"D:\MachineLearingProject\RDD2022")

# Get country folders again
countries = [d for d in BASE_DIR.iterdir() if d.is_dir()]

countries

[WindowsPath('D:/MachineLearingProject/RDD2022/China_Drone'),
 WindowsPath('D:/MachineLearingProject/RDD2022/China_MotorBike'),
 WindowsPath('D:/MachineLearingProject/RDD2022/Czech'),
 WindowsPath('D:/MachineLearingProject/RDD2022/India'),
 WindowsPath('D:/MachineLearingProject/RDD2022/Japan'),
 WindowsPath('D:/MachineLearingProject/RDD2022/Norway'),
 WindowsPath('D:/MachineLearingProject/RDD2022/United_States')]

In [10]:
# Mapping VOC damage labels → YOLO class IDs
CLASS_MAPPING = {
    "D00": 0,
    "D10": 1,
    "D20": 2,
    "D40": 3,
    "D44": 4
}

CLASS_MAPPING


{'D00': 0, 'D10': 1, 'D20': 2, 'D40': 3, 'D44': 4}

In [11]:
import xml.etree.ElementTree as ET
import shutil
import cv2

def convert_xml_to_yolo(xml_file, img_file, label_output_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Image size from XML
    img_width = int(root.find("size/width").text)
    img_height = int(root.find("size/height").text)

    lines = []

    for obj in root.findall("object"):
        label = obj.find("name").text
        
        if label not in CLASS_MAPPING:
            continue   # skip unknown classes
        
        class_id = CLASS_MAPPING[label]

        bbox = obj.find("bndbox")
        
        xmin = float(bbox.find("xmin").text)
        ymin = float(bbox.find("ymin").text)
        xmax = float(bbox.find("xmax").text)
        ymax = float(bbox.find("ymax").text)

        # Convert to YOLO format
        x_center = (xmin + xmax) / 2 / img_width
        y_center = (ymin + ymax) / 2 / img_height
        width = (xmax - xmin) / img_width
        height = (ymax - ymin) / img_height

        lines.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")

    # Write YOLO TXT file
    with open(label_output_file, "w") as f:
        f.write("\n".join(lines))


In [12]:
import random
random.seed(42)

def split_data(file_list, train_ratio=0.9):
    random.shuffle(file_list)
    train_count = int(len(file_list) * train_ratio)
    return file_list[:train_count], file_list[train_count:]


In [7]:
def process_country(country):
    print("Processing:", country)

    image_dir = BASE_DIR / country / "train" / "images"
    xml_dir   = BASE_DIR / country / "train" / "annotations" / "xmls"

    image_files = list(image_dir.glob("*.jpg"))

    train_files, val_files = split_data(image_files, train_ratio=0.9)

    # Process train files
    for img_file in train_files:
        xml_file = xml_dir / (img_file.stem + ".xml")
        label_file = YOLO_DIR / "labels/train" / (img_file.stem + ".txt")
        out_img_file = YOLO_DIR / "images/train" / img_file.name

        shutil.copy(img_file, out_img_file)
        convert_xml_to_yolo(xml_file, img_file, label_file)

    # Process val files
    for img_file in val_files:
        xml_file = xml_dir / (img_file.stem + ".xml")
        label_file = YOLO_DIR / "labels/val" / (img_file.stem + ".txt")
        out_img_file = YOLO_DIR / "images/val" / img_file.name

        shutil.copy(img_file, out_img_file)
        convert_xml_to_yolo(xml_file, img_file, label_file)

for country_dir in countries:
    process_country(country_dir.name)

print("YOLO dataset created successfully!")


Processing: China_Drone
Processing: China_MotorBike
Processing: Czech
Processing: India
Processing: Japan
Processing: Norway
Processing: United_States
YOLO dataset created successfully!


In [13]:
yaml_content = f"""
train: {YOLO_DIR}/images/train
val: {YOLO_DIR}/images/val

nc: 5
names: ['D00', 'D10', 'D20', 'D40', 'D44']
"""

with open(YOLO_DIR / "dataset.yaml", "w") as f:
    f.write(yaml_content)

print("dataset.yaml created!")


dataset.yaml created!


ModuleNotFoundError: No module named 'ultralytics'

In [16]:
pip install ultralytics


Collecting ultralytics
  Using cached ultralytics-8.3.228-py3-none-any.whl.metadata (37 kB)
Collecting torch>=1.8.0 (from ultralytics)
  Using cached torch-2.9.1-cp313-cp313-win_amd64.whl.metadata (30 kB)
Collecting torchvision>=0.9.0 (from ultralytics)
  Using cached torchvision-0.24.1-cp313-cp313-win_amd64.whl.metadata (5.9 kB)
Collecting polars (from ultralytics)
  Using cached polars-1.35.2-py3-none-any.whl.metadata (10 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Using cached ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Collecting filelock (from torch>=1.8.0->ultralytics)
  Using cached filelock-3.20.0-py3-none-any.whl.metadata (2.1 kB)
Collecting networkx>=2.5.1 (from torch>=1.8.0->ultralytics)
  Using cached networkx-3.5-py3-none-any.whl.metadata (6.3 kB)
Collecting fsspec>=0.8.5 (from torch>=1.8.0->ultralytics)
  Using cached fsspec-2025.10.0-py3-none-any.whl.metadata (10 kB)
Collecting polars-runtime-32==1.35.2 (from polars->ultralytics)
  Using ca


[notice] A new release of pip is available: 24.2 -> 25.3
[notice] To update, run: C:\App\python.exe -m pip install --upgrade pip


In [17]:
import ultralytics
ultralytics.checks()


Ultralytics 8.3.228  Python-3.13.0 torch-2.9.1+cpu CPU (Intel Core i7-9750H 2.60GHz)
Setup complete  (12 CPUs, 31.7 GB RAM, 336.0/1863.0 GB disk)


In [18]:
from ultralytics import YOLO

# Load YOLO Nano model (fastest)
model = YOLO("yolov8n.pt")

# Train the model
model.train(
    data=r"D:\MachineLearingProject\YOLODataset\dataset.yaml",
    epochs=5,
    imgsz=384,
    batch=4,
    name="road_damage_detector"
)


[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt': 100% ━━━━━━━━━━━━ 6.2MB 27.3MB/s 0.2s.2s<0.0s.8s
Ultralytics 8.3.228  Python-3.13.0 torch-2.9.1+cpu CPU (Intel Core i7-9750H 2.60GHz)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=D:\MachineLearingProject\YOLODataset\dataset.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=20, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, m

KeyboardInterrupt: 

In [1]:
from ultralytics import YOLO

model = YOLO("runs/detect/road_damage_detector/weights/last.pt")

model.train(
    data=r"D:\MachineLearingProject\YOLODataset\dataset.yaml",
    resume=True
)

Ultralytics 8.3.228  Python-3.13.0 torch-2.9.1+cpu CPU (Intel Core i7-9750H 2.60GHz)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=D:\MachineLearingProject\YOLODataset\dataset.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=20, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=runs\detect\road_damage_detector\weights\last.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=road_damage_detector, nbs=64, nms=False, opset=None, optimize=Fals

KeyboardInterrupt: 

In [2]:
model.train(
    data=r"D:\MachineLearingProject\YOLODataset\dataset.yaml",
    epochs=5,
    imgsz=384,
    batch=4,
    resume=True,
    name="road_damage_cpu_fast"
)

Ultralytics 8.3.228  Python-3.13.0 torch-2.9.1+cpu CPU (Intel Core i7-9750H 2.60GHz)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=D:\MachineLearingProject\YOLODataset\dataset.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=20, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=384, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=runs\detect\road_damage_detector\weights\last.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=road_damage_detector, nbs=64, nms=False, opset=None, optimize=Fals

KeyboardInterrupt: 

In [None]:
from ultralytics import YOLO

# Load the last checkpoint as pretrained weights
model = YOLO("runs/detect/road_damage_detector/weights/last.pt")

# Start a NEW training run using these weights
model.train(
    data=r"D:\MachineLearingProject\YOLODataset\dataset.yaml",
    epochs=5,
    imgsz=384,
    batch=4,
    name="road_damage_cpu_fast"
)



Ultralytics 8.3.228  Python-3.13.0 torch-2.9.1+cpu CPU (Intel Core i7-9750H 2.60GHz)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=D:\MachineLearingProject\YOLODataset\dataset.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=5, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=384, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=runs/detect/road_damage_detector/weights/last.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=road_damage_cpu_fast, nbs=64, nms=False, opset=None, optimize=False

In [2]:
from ultralytics import YOLO

model = YOLO(
    r"D:\MachineLearingProject\runs\detect\road_damage_cpu_fast\weights\best.pt"
)

print("Model loaded successfully")


Model loaded successfully


In [9]:
results = model.predict(
    source=r"D:\MachineLearingProject\test_images\road2.jpg",
    save=True,
    conf=0.25
)


image 1/1 D:\MachineLearingProject\test_images\road2.jpg: 384x384 (no detections), 40.2ms
Speed: 1.6ms preprocess, 40.2ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 384)
Results saved to [1mD:\MachineLearingProject\runs\detect\predict[0m


In [15]:
from ultralytics import YOLO

model = YOLO(r"D:\MachineLearingProject\runs\detect\road_damage_cpu_fast\weights\best.pt")

results = model.predict(
    source=r"D:\MachineLearingProject\test_images\road2.jpg",
    save=True,
    conf=0.10    # lower confidence for better detection
)


image 1/1 D:\MachineLearingProject\test_images\road2.jpg: 384x384 1 D10, 1 D20, 47.7ms
Speed: 2.3ms preprocess, 47.7ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 384)
Results saved to [1mD:\MachineLearingProject\runs\detect\predict5[0m


In [20]:
results = model.predict(
    source=r"D:\MachineLearingProject\test_images\road2.jpg",
    save=True,
    conf=0.10
)



image 1/1 D:\MachineLearingProject\test_images\road2.jpg: 384x384 1 D10, 1 D20, 115.6ms
Speed: 2.8ms preprocess, 115.6ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 384)
Results saved to [1mD:\MachineLearingProject\runs\detect\predict5[0m


In [2]:
model.predict(
    source=r"D:\MachineLearingProject\test_images\road3.webp",
    save=True,
    conf=0.10
)



image 1/1 D:\MachineLearingProject\test_images\road3.webp: 256x384 1 D44, 92.9ms
Speed: 4.4ms preprocess, 92.9ms inference, 10.9ms postprocess per image at shape (1, 3, 256, 384)
Results saved to [1mD:\MachineLearingProject\runs\detect\predict6[0m


[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'D00', 1: 'D10', 2: 'D20', 3: 'D40', 4: 'D44'}
 obb: None
 orig_img: array([[[ 14,  16,  11],
         [ 14,  16,  11],
         [ 14,  16,  11],
         ...,
         [ 13,  14,  11],
         [ 13,  14,  11],
         [ 13,  14,  11]],
 
        [[ 14,  16,  11],
         [ 14,  16,  11],
         [ 14,  16,  11],
         ...,
         [ 10,  11,   9],
         [ 10,  11,   9],
         [ 10,  11,   9]],
 
        [[ 14,  16,  11],
         [ 14,  16,  11],
         [ 14,  16,  11],
         ...,
         [ 10,  11,   9],
         [ 10,  11,   9],
         [ 10,  11,   9]],
 
        ...,
 
        [[ 87,  78,  63],
         [101,  92,  77],
         [121, 111,  94],
         ...,
         [124, 119, 100],
         [131, 126, 107],
         [143, 138, 118]],
 
        [[ 89,  78,  62],
         [106,  94,  75],
         [129, 115,  

In [4]:
model.predict(
    source=r"D:\MachineLearingProject\samples\road2.jpg",
    save=True,
    conf=0.1
)


image 1/1 D:\MachineLearingProject\samples\road2.jpg: 384x384 1 D10, 1 D20, 50.5ms
Speed: 3.7ms preprocess, 50.5ms inference, 4.6ms postprocess per image at shape (1, 3, 384, 384)
Results saved to [1mD:\MachineLearingProject\notebooks\runs\detect\predict[0m


[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'D00', 1: 'D10', 2: 'D20', 3: 'D40', 4: 'D44'}
 obb: None
 orig_img: array([[[223, 228, 227],
         [227, 232, 231],
         [224, 229, 228],
         ...,
         [217, 213, 212],
         [218, 214, 213],
         [216, 212, 211]],
 
        [[224, 229, 228],
         [226, 231, 230],
         [222, 227, 226],
         ...,
         [216, 212, 211],
         [217, 213, 212],
         [215, 211, 210]],
 
        [[224, 229, 228],
         [224, 229, 228],
         [219, 224, 223],
         ...,
         [215, 211, 210],
         [217, 213, 212],
         [215, 211, 210]],
 
        ...,
 
        [[142, 130, 124],
         [143, 131, 125],
         [144, 132, 126],
         ...,
         [145, 133, 127],
         [141, 129, 123],
         [139, 127, 121]],
 
        [[146, 134, 128],
         [146, 134, 128],
         [147, 135, 1

In [5]:
results = model.predict(
    source=r"D:\MachineLearingProject\samples\road2.jpg",
    conf=0.1,
    save=True
)

print(results[0].boxes.cls)   # class IDs
print(results[0].boxes.conf)  # confidence scor


image 1/1 D:\MachineLearingProject\samples\road2.jpg: 384x384 1 D10, 1 D20, 32.0ms
Speed: 1.7ms preprocess, 32.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 384)
Results saved to [1mD:\MachineLearingProject\notebooks\runs\detect\predict[0m
tensor([1., 2.])
tensor([0.1616, 0.1023])
