In [7]:
import os
import json
import cv2
import numpy as np
from tqdm import tqdm

In [8]:
# Paths
DATASET_DIR = "archive/TUSimple/train_set/"
OUTPUT_IMG_DIR = "processed/images/"
OUTPUT_MASK_DIR = "processed/masks/"
ANNOTATION_FILE = "processed/annotations.json"

In [9]:
# Create output folders
os.makedirs(OUTPUT_IMG_DIR, exist_ok=True)
os.makedirs(OUTPUT_MASK_DIR, exist_ok=True)


In [10]:
# All label files
LABEL_FILES = [
    "label_data_0313.json",
    "label_data_0531.json",
    "label_data_0601.json"
]

In [12]:
processed_annotations = []

# Target size for training
IMG_WIDTH, IMG_HEIGHT = 512, 288
print("Preprocessing and saving images + masks...")

Preprocessing and saving images + masks...


In [13]:
idx = 0
for label_file in LABEL_FILES:
    label_path = os.path.join(DATASET_DIR, label_file)
    print(f"\n📂 Processing {label_file}...")

    with open(label_path, "r") as f:
        labels = [json.loads(line) for line in f]

    for sample in tqdm(labels):
        img_path = os.path.join(DATASET_DIR, sample['raw_file'])
        lanes = sample['lanes']
        h_samples = sample['h_samples']

        # Load and resize image
        img = cv2.imread(img_path)
        if img is None:
            continue
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))

        # Create blank mask
        mask = np.zeros((IMG_HEIGHT, IMG_WIDTH), dtype=np.uint8)

        # Scale factors (since we resized image)
        orig_h, orig_w = sample.get("height", 720), sample.get("width", 1280)  # fallback
        scale_x, scale_y = IMG_WIDTH / orig_w, IMG_HEIGHT / orig_h

        # Draw lanes on mask
        for lane in lanes:
            points = [(int(x * scale_x), int(y * scale_y)) 
                      for x, y in zip(lane, h_samples) if x != -2]
            if len(points) > 1:
                cv2.polylines(mask, [np.array(points)], isClosed=False, color=255, thickness=5)

        # Save processed image + mask
        img_filename = f"{idx:06d}.png"
        mask_filename = f"{idx:06d}.png"

        cv2.imwrite(os.path.join(OUTPUT_IMG_DIR, img_filename), cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
        cv2.imwrite(os.path.join(OUTPUT_MASK_DIR, mask_filename), mask)

        processed_annotations.append({
            "image": os.path.join(OUTPUT_IMG_DIR, img_filename),
            "mask": os.path.join(OUTPUT_MASK_DIR, mask_filename)
        })

        idx += 1



📂 Processing label_data_0313.json...


100%|██████████| 2858/2858 [01:10<00:00, 40.79it/s]



📂 Processing label_data_0531.json...


100%|██████████| 358/358 [00:09<00:00, 38.36it/s]



📂 Processing label_data_0601.json...


100%|██████████| 410/410 [00:10<00:00, 39.54it/s]


In [14]:
# Save annotation index
with open(ANNOTATION_FILE, "w") as f:
    json.dump(processed_annotations, f, indent=2)

print(f"\n✅ Preprocessing complete. Saved {len(processed_annotations)} images + masks from all 3 label files.")



✅ Preprocessing complete. Saved 3626 images + masks from all 3 label files.
