<a href="https://colab.research.google.com/github/tsaravindh/Python_ML_NN_Projects/blob/main/preprocessing_step.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dataset Preprocessing for YOLO Training

## Convert JSON Annotations to YOLO Format

This block reads bounding box annotations from JSON files and converts them into the YOLO format.

- It parses label data for each object (specifically `weapon` class).
- Extracts bounding box coordinates and image size.
- Transforms boxes to YOLO format: `[class_id, x_center, y_center, width, height]` with values normalized to image dimensions.
- Saves each annotation as a `.txt` file corresponding to each image.

In [None]:

import os
import json


json_folder=r"C:\Users\akram\OneDrive\Desktop\saeed\capstone\dataset\val_ann"
output_txt_folder=r"C:\Users\akram\OneDrive\Desktop\saeed\capstone\weapon_detection\labels\val"


# Create output directory if not exists
os.makedirs(output_txt_folder, exist_ok=True)

def convert_box_to_yolo(x_min, y_min, x_max, y_max, img_width, img_height):
    x_center = ((x_min + x_max) / 2) / img_width
    y_center = ((y_min + y_max) / 2) / img_height
    width = (x_max - x_min) / img_width
    height = (y_max - y_min) / img_height
    return x_center, y_center, width, height

for filename in os.listdir(json_folder):
    if filename.endswith(".json"):
        json_path = os.path.join(json_folder, filename)
        with open(json_path, 'r') as f:
            data = json.load(f)

        img_width = data['size']['width']
        img_height = data['size']['height']
        yolo_lines = []

        for obj in data['objects']:
            if obj['classTitle'].lower() == 'weapon':
                x_min = obj['points']['exterior'][0][0]
                y_min = obj['points']['exterior'][0][1]
                x_max = obj['points']['exterior'][1][0]
                y_max = obj['points']['exterior'][1][1]

                x_center, y_center, w, h = convert_box_to_yolo(x_min, y_min, x_max, y_max, img_width, img_height)
                yolo_lines.append(f"0 {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}")

        # Write to .txt with same name as image
        txt_filename = filename.replace('.json', '.txt')
        with open(os.path.join(output_txt_folder, txt_filename), 'w') as f:
            f.write("\n".join(yolo_lines))

print("Conversion complete. YOLO .txt files saved to:", output_txt_folder)


Conversion complete. YOLO .txt files saved to: C:\Users\akram\OneDrive\Desktop\saeed\capstone\weapon_detection\labels\val


## Rename Label Files

Renames label files that were mistakenly named with `.jpg.txt` extensions to just `.txt`.

- Ensures compatibility with YOLO training which expects `.txt` labels.
- Uses basic string replacement to clean filenames in the label directory.

In [None]:

folder_path = r"C:\Users\akram\OneDrive\Desktop\saeed\capstone\weapon_detection\labels\train"
# Loop through all files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.jpg.txt'):
        # New filename without the '.jpg' part
        new_filename = filename.replace('.jpg.txt', '.txt')

        # Full paths for renaming
        old_path = os.path.join(folder_path, filename)
        new_path = os.path.join(folder_path, new_filename)

        os.rename(old_path, new_path)


