BDD_100K Dataset Overview

In [13]:
import os
import json

#bdd_path = "/Volumes/DDH/Pitts/oinauto/wk1/BDD/"
bdd_path = "./BDD/"
if "bdd_json_files" in globals():
    del bdd_json_files
bdd_json_files = []

# Recursively search for all JSON files
for root, _, files in os.walk(bdd_path):
    for file in files:
        if file.endswith(".json"):
            bdd_json_files.append(os.path.join(root, file))

print(f"Total {len(bdd_json_files)} JSON files found")

Total 1601 JSON files found


1.1 BDD Dataset Statistics

In [14]:
bdd_data = []

# Iterate through all found JSON files
for file in bdd_json_files:
    with open(file, "r") as f:
        try:
            data = json.load(f)
            bdd_data.extend(data)
        except json.JSONDecodeError:
            print(f"Failed to parse {file}, possibly due to format errors")  

print(f"Successfully loaded {len(bdd_data)} annotation entries")  

Successfully loaded 318255 annotation entries


1.2 BDD Dataset Structure Exploration

In [None]:
sample_file = "./BDD/box_track_labels_trainval/bdd100k/labels/box_track_20/train/0000f77c-6257be58.json"

with open(sample_file, "r") as f:
    sample_data = json.load(f)

print(json.dumps(sample_data[0], indent=4))

1.3 BDD Label Analysis

In [None]:
label_types = set()

for item in sample_data:
    for label in item.get("labels", []):
        label_types.update(label.keys())

print(f"Unique label types found: {label_types}")

1.4 Extract Object Detection Labels (box2d)

In [None]:
import pandas as pd

bdd_data_list = []

for item in bdd_data:
    bdd_image_name = item.get("name", "Unknown")  
    for obj in item.get("labels", []):  
        if "box2d" in obj:  
            x1, y1, x2, y2 = obj["box2d"].values()  # Extract bounding box coordinates 
            bdd_data_list.append([bdd_image_name, obj["category"], x1, y1, x2, y2])

df = pd.DataFrame(bdd_data_list, columns=["Image", "Category", "x1", "y1", "x2", "y2"])

print(df.head())

1.5 Object Detection Category Distribution

In [None]:
bdd_category_counts = df["Category"].value_counts()

print(bdd_category_counts.head())

1.6 Visualization of Object Detection Categories

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))

ax = sns.barplot(x=bdd_category_counts.index, y=bdd_category_counts.values, palette="coolwarm")

# Add text annotations on top of each bar
for i, value in enumerate(bdd_category_counts.values):
    ax.text(i, value + 5, str(value), ha='center', fontsize=12)

plt.xlabel("Category")
plt.ylabel("Number of Annotations")
plt.title("BDD Object Detection Category Distribution")

plt.xticks(rotation=45, fontsize=14)

plt.show()

1.7 Visualizing Bounding Boxes

In [None]:
import os
import json
import cv2
import matplotlib.pyplot as plt

sample_img_name = "0000f77c-6257be58-0000001.jpg"
image_folder = "./BDD/track_images_train/bdd100k/images/track/train/0000f77c-6257be58/"

with open(json_file, "r") as f:
    sample_json_data = json.load(f)

target_img_path = os.path.join(image_folder, sample_img_name)

if not os.path.exists(target_img_path):
    print(f"Image not found: {target_img_path}")
    exit()

image = cv2.imread(target_img_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

for obj in sample_json_data[0]["labels"]:  # Process first frame
    if "box2d" in obj:
        x1, y1, x2, y2 = int(obj["box2d"]["x1"]), int(obj["box2d"]["y1"]), int(obj["box2d"]["x2"]), int(obj["box2d"]["y2"])
        cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2)  # Blue box
        cv2.putText(image, obj["category"], (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 5, (255, 0, 0), 2)

plt.figure(figsize=(8, 6))
plt.imshow(image)
plt.axis("off")
plt.show()

In [None]:
import json

json_path = "./BDD/box_track_labels_trainval/bdd100k/labels/box_track_20/train/00a0f008-3c67908e.json"

with open(json_path, "r") as f:
    data = json.load(f)

print(json.dumps(data[:2], indent=4)) 

In [None]:
BDD Resize

In [None]:
import os
from PIL import Image

bdd_path = "./BDD/"

image_folders = [
    "track_images_train", 
    "track_images_val", 
    "track_images_test"
]

output_root_416 = "/Volumes/DDH/Pitts/oinauto/wk2/BDD_resized_416/"
output_root_512 = "/Volumes/DDH/Pitts/oinauto/wk2/BDD_resized_512/"

target_sizes = {
    "416": (416, 416),
    "512": (512, 512)
}

for folder in image_folders:
    original_image_folder = os.path.join(bdd_path, folder)
    output_folder_416 = os.path.join(output_root_416, folder)
    output_folder_512 = os.path.join(output_root_512, folder)

    os.makedirs(output_folder_416, exist_ok=True)
    os.makedirs(output_folder_512, exist_ok=True)

    for root, _, files in os.walk(original_image_folder):
        for file in files:
            if file.endswith((".jpg", ".png")):  
                img_path = os.path.join(root, file)

                relative_path = os.path.relpath(root, original_image_folder)
                save_path_416 = os.path.join(output_folder_416, relative_path)
                save_path_512 = os.path.join(output_folder_512, relative_path)

                os.makedirs(save_path_416, exist_ok=True)
                os.makedirs(save_path_512, exist_ok=True)

                try:
                    with Image.open(img_path) as img:
                        img_resized_416 = img.resize(target_sizes["416"], Image.LANCZOS)
                        img_resized_416.save(os.path.join(save_path_416, file))

                        img_resized_512 = img.resize(target_sizes["512"], Image.LANCZOS)
                        img_resized_512.save(os.path.join(save_path_512, file))

                        print(f"Resized & Saved: {file} in {folder}/{relative_path}")
                except Exception as e:
                    print(f"Error processing {file}: {e}")

print("All images from train, val, and test sets resized successfully!")

Resized & Saved: 02ddd956-3bfcf83e-0000049.jpg in track_images_train/bdd100k/images/track/train/02ddd956-3bfcf83e
Resized & Saved: 02ddd956-3bfcf83e-0000061.jpg in track_images_train/bdd100k/images/track/train/02ddd956-3bfcf83e
Resized & Saved: 02ddd956-3bfcf83e-0000075.jpg in track_images_train/bdd100k/images/track/train/02ddd956-3bfcf83e
Resized & Saved: 02ddd956-3bfcf83e-0000129.jpg in track_images_train/bdd100k/images/track/train/02ddd956-3bfcf83e
Resized & Saved: 02ddd956-3bfcf83e-0000115.jpg in track_images_train/bdd100k/images/track/train/02ddd956-3bfcf83e
Resized & Saved: 02ddd956-3bfcf83e-0000101.jpg in track_images_train/bdd100k/images/track/train/02ddd956-3bfcf83e
Resized & Saved: 02ddd956-3bfcf83e-0000100.jpg in track_images_train/bdd100k/images/track/train/02ddd956-3bfcf83e
Resized & Saved: 02ddd956-3bfcf83e-0000114.jpg in track_images_train/bdd100k/images/track/train/02ddd956-3bfcf83e
Resized & Saved: 02ddd956-3bfcf83e-0000128.jpg in track_images_train/bdd100k/images/trac

In [7]:
import os
 
original_folders = [
    "./BDD/track_images_train",
    "./BDD/track_images_val",
    "./BDD/track_images_test"
]
 
output_folder_416 = "/Volumes/DDH/Pitts/oinauto/wk2/BDD_resized_416/"
output_folder_512 = "/Volumes/DDH/Pitts/oinauto/wk2/BDD_resized_512/"

# Dictionary to store original images (key: filename, value: full path)
original_images_dict = {}

# Traverse original dataset and store image paths
for folder in original_folders:
    for root, _, files in os.walk(folder):
        for file in files:
            if file.endswith((".jpg", ".png")):
                original_images_dict[file] = os.path.join(root, file)
 
resized_images_416_set = set()
resized_images_512_set = set()

for root, _, files in os.walk(output_folder_416):
    for file in files:
        resized_images_416_set.add(file)

for root, _, files in os.walk(output_folder_512):
    for file in files:
        resized_images_512_set.add(file)
 
missing_416 = {file: path for file, path in original_images_dict.items() if file not in resized_images_416_set}
missing_512 = {file: path for file, path in original_images_dict.items() if file not in resized_images_512_set}

# Save missing images to files with full paths
with open("missing_images_416.txt", "w") as f:
    for file, path in missing_416.items():
        f.write(f"{file},{path}\n")

with open("missing_images_512.txt", "w") as f:
    for file, path in missing_512.items():
        f.write(f"{file},{path}\n")
 
print(f"Total Original Images: {len(original_images_dict)}")
print(f"Resized 416x416 Images: {len(resized_images_416_set)}")
print(f"Resized 512x512 Images: {len(resized_images_512_set)}")
print(f"{len(missing_416)} images were not resized to 416x416. Saved in missing_images_416.txt")
print(f"{len(missing_512)} images were not resized to 512x512. Saved in missing_images_512.txt")
 
if not missing_416 and not missing_512:
    print("All images have been successfully resized!")
else:
    print("Some images were not resized. Check missing images files.")

Total Original Images: 397688
Resized 416x416 Images: 395967
Resized 512x512 Images: 395967
1721 images were not resized to 416x416. Saved in missing_images_416.txt
1721 images were not resized to 512x512. Saved in missing_images_512.txt
Some images were not resized. Check missing images files.


In [6]:
import os

original_folders = [
    "./BDD/track_images_train",
    "./BDD/track_images_val",
    "./BDD/track_images_test"
]

output_folders_416 = "/Volumes/DDH/Pitts/oinauto/wk2/BDD_resized_416/"
output_folders_512 = "/Volumes/DDH/Pitts/oinauto/wk2/BDD_resized_512/"

original_images_dict = {}  # key: file name, value: full path
for folder in original_folders:
    for root, _, files in os.walk(folder):
        for file in files:
            if file.endswith((".jpg", ".png")):
                original_images_dict[file] = os.path.join(root, file)

resized_images_416_set = set(os.listdir(output_folders_416))
resized_images_512_set = set(os.listdir(output_folders_512))

missing_416 = {file: path for file, path in original_images_dict.items() if file not in resized_images_416_set}
missing_512 = {file: path for file, path in original_images_dict.items() if file not in resized_images_512_set}

with open("missing_images_416.txt", "w") as f:
    for file, path in missing_416.items():
        f.write(f"{file},{path}\n")  

with open("missing_images_512.txt", "w") as f:
    for file, path in missing_512.items():
        f.write(f"{file},{path}\n") 

print(f"{len(missing_416)} images were not resized to 416x416. Saved in missing_images_416.txt")
print(f"{len(missing_512)} images were not resized to 512x512. Saved in missing_images_512.txt")

397688 images were not resized to 416x416. Saved in missing_images_416.txt
397688 images were not resized to 512x512. Saved in missing_images_512.txt


In [1]:
import os
from PIL import Image

target_sizes = {
    "416": (416, 416),
    "512": (512, 512)
}
 
output_root_416 = "/Volumes/DDH/Pitts/oinauto/wk2/BDD_resized_416/"
output_root_512 = "/Volumes/DDH/Pitts/oinauto/wk2/BDD_resized_512/"

# Function to process and resize images
def resize_image(image_path, output_path, size):
    try:
        with Image.open(image_path) as img:
            img_resized = img.resize(size, Image.LANCZOS)
            os.makedirs(os.path.dirname(output_path), exist_ok=True)  # Ensure output directory exists
            img_resized.save(output_path)
            return f"Resized: {image_path} -> {output_path}"
    except Exception as e:
        return f"Error processing {image_path}: {e}"

# Function to process missing images using correct paths
def process_missing_images(missing_file, output_folder, size):
    with open(missing_file, "r") as f:
        missing_images = [line.strip().split(",", 1) for line in f.readlines()]  # Split only at the first comma

    for file_name, original_path in missing_images:
        original_path = original_path.strip()  # Remove any trailing spaces

        relative_path = os.path.relpath(original_path, "./BDD/")
        output_path = os.path.join(output_folder, relative_path)
 
        result = resize_image(original_path, output_path, size)
        print(result)

print("Processing missing 416x416 images...")
process_missing_images("missing_images_416.txt", output_root_416, target_sizes["416"])

print("Processing missing 512x512 images...")
process_missing_images("missing_images_512.txt", output_root_512, target_sizes["512"])

print("Missing image resizing completed!")

Processing missing 416x416 images...
Resized: ./BDD/track_images_test/bdd100k/images/track/test/cbbc1915-6292d4cb/cbbc1915-6292d4cb-0000123.jpg -> /Volumes/DDH/Pitts/oinauto/wk2/BDD_resized_416/track_images_test/bdd100k/images/track/test/cbbc1915-6292d4cb/cbbc1915-6292d4cb-0000123.jpg
Resized: ./BDD/track_images_test/bdd100k/images/track/test/cbbc1915-6292d4cb/cbbc1915-6292d4cb-0000137.jpg -> /Volumes/DDH/Pitts/oinauto/wk2/BDD_resized_416/track_images_test/bdd100k/images/track/test/cbbc1915-6292d4cb/cbbc1915-6292d4cb-0000137.jpg
Resized: ./BDD/track_images_test/bdd100k/images/track/test/cbbc1915-6292d4cb/cbbc1915-6292d4cb-0000136.jpg -> /Volumes/DDH/Pitts/oinauto/wk2/BDD_resized_416/track_images_test/bdd100k/images/track/test/cbbc1915-6292d4cb/cbbc1915-6292d4cb-0000136.jpg
Resized: ./BDD/track_images_test/bdd100k/images/track/test/cbbc1915-6292d4cb/cbbc1915-6292d4cb-0000122.jpg -> /Volumes/DDH/Pitts/oinauto/wk2/BDD_resized_416/track_images_test/bdd100k/images/track/test/cbbc1915-6292d4

In [2]:
import os
 
original_folders = [
    "./BDD/track_images_train",
    "./BDD/track_images_val",
    "./BDD/track_images_test"
]
 
output_folder_416 = "/Volumes/DDH/Pitts/oinauto/wk2/BDD_resized_416/"
output_folder_512 = "/Volumes/DDH/Pitts/oinauto/wk2/BDD_resized_512/"

# Dictionary to store original images (key: filename, value: full path)
original_images_dict = {}

# Traverse original dataset and store image paths
for folder in original_folders:
    for root, _, files in os.walk(folder):
        for file in files:
            if file.endswith((".jpg", ".png")):
                original_images_dict[file] = os.path.join(root, file)
 
resized_images_416_set = set()
resized_images_512_set = set()

for root, _, files in os.walk(output_folder_416):
    for file in files:
        resized_images_416_set.add(file)

for root, _, files in os.walk(output_folder_512):
    for file in files:
        resized_images_512_set.add(file)
 
missing_416 = {file: path for file, path in original_images_dict.items() if file not in resized_images_416_set}
missing_512 = {file: path for file, path in original_images_dict.items() if file not in resized_images_512_set}

# Save missing images to files with full paths
with open("missing_images_416.txt", "w") as f:
    for file, path in missing_416.items():
        f.write(f"{file},{path}\n")

with open("missing_images_512.txt", "w") as f:
    for file, path in missing_512.items():
        f.write(f"{file},{path}\n")
 
print(f"Total Original Images: {len(original_images_dict)}")
print(f"Resized 416x416 Images: {len(resized_images_416_set)}")
print(f"Resized 512x512 Images: {len(resized_images_512_set)}")
print(f"{len(missing_416)} images were not resized to 416x416. Saved in missing_images_416.txt")
print(f"{len(missing_512)} images were not resized to 512x512. Saved in missing_images_512.txt")
 
if not missing_416 and not missing_512:
    print("All images have been successfully resized!")
else:
    print("Some images were not resized. Check missing images files.")

Total Original Images: 397688
Resized 416x416 Images: 397688
Resized 512x512 Images: 397688
0 images were not resized to 416x416. Saved in missing_images_416.txt
0 images were not resized to 512x512. Saved in missing_images_512.txt
All images have been successfully resized!
