### Converting labels' index


- In case the dataset from COCO and our dataset have different labelling (e.g. COCO has 0 for knife, while ours has 0 for spoon). We could use this to convert the labels to only one format.


In [None]:
from pathlib import Path
from tqdm import tqdm

# ================= CONFIGURATION =================

# 1. Path to your OLD labels (The folder containing .txt files)
#    It can contain subfolders (e.g., 'train', 'val')
INPUT_DIR = ...

# 2. Path where the NEW, FIXED labels will be saved
OUTPUT_DIR = ...

# 3. THE MAPPING DICTIONARY (CRITICAL STEP)
#    Format: { OLD_ID : NEW_ID }
#    Look at your OLD data.yaml to find the OLD_IDs.
#    Look at your NEW Master List to find the NEW_IDs.
ID_MAPPING = { ... }

In [None]:
# Convert strings to Path objects
input_path = Path(INPUT_DIR)
output_path = Path(OUTPUT_DIR)

# Find all .txt files recursively
files = list(input_path.rglob("*.txt"))

print(f"Found {len(files)} label files to process.")
print(f"Mapping rules: {ID_MAPPING}")

if not files:
    raise Exception("❌ No .txt files found! Check your INPUT_DIR.")

# Process files
succ_count = 0
for file_path in tqdm(files, desc="Remapping Labels"):
    
    # Read the old file
    with open(file_path, 'r') as f:
        lines = f.readlines()

    new_lines = []
    
    for line in lines:
        parts = line.strip().split()
        
        # Skip empty lines
        if not parts:
            continue
            
        if len(parts) != 5:
            for k in ID_MAPPING.keys():
                k = str(k)
                if k in line:
                    class_id = int(k)
                    break
        else:
            class_id = int(parts[0])
        coords = parts[-4:] # x, y, w, h
        
        # Check if this class needs remapping
        if class_id in list(ID_MAPPING.keys()):
            new_id = ID_MAPPING[class_id]
            
            # Reconstruct the line with the NEW ID
            # join coordinates back together with spaces
            new_line = f"{new_id} {' '.join(coords)}\n"
            new_lines.append(new_line)
        else:
            # print(list(ID_MAPPING.keys()))
            # print(f"⚠️ Class {class_id} is not in ID_MAPPING")
            # OPTIONAL: Keep classes that aren't in the map?
            # Or print a warning? 
            # Uncomment the line below to keep unmapped classes as-is:
            # new_lines.append(line)
            # print(f"Class not in map: {line.strip("\n")}")
            pass

    # Calculate new file path to mirror structure
    # e.g., input/train/file.txt -> output/train/file.txt
    relative_path = file_path.relative_to(input_path)
    save_path = output_path / relative_path
    
    # Create subdirectories if they don't exist
    save_path.parent.mkdir(parents=True, exist_ok=True)
    
    # Write the new file
    if new_lines:
        with open(save_path, 'w') as f:
            f.writelines(new_lines)
        succ_count += 1

print(f"\n✅ Done! Fixed labels ({succ_count} out of {len(files)}) are located in: {OUTPUT_DIR}.")
print("⚠️  Remember to update your dataset.yaml to point to this new folder!")

Found 277 label files to process.
Mapping rules: {0: 0, 1: 1, 2: 2, 3: 5, 4: 3, 5: 4}


Remapping Labels: 100%|██████████| 277/277 [00:00<00:00, 1658.83it/s]


✅ Done! Fixed labels (277 out of 277) are located in: E:\projects\_full_fledge\Kitchen-To-Ol\resources\annotation\Knife-2\labels_fixed.
⚠️  Remember to update your dataset.yaml to point to this new folder!





### Check img and txt


- Check compatibility between img and txt
- An utils for changing a class according to name


In [None]:
### Get dir_a and dir_b, get only the file names that are in both dirs
#       - Files in dir_a are .txt files
#       - Files in dir_b are image files
#       E.g. "img_01.jpg" and "img_01.txt" --> Kept
#       E.g. "img_02.jpg" but no "img_02.txt" --> Deleted

import os

def run(dir_a, dir_b):
    a_files = {os.path.splitext(f)[0] for f in os.listdir(dir_a) if f.endswith(".txt")}
    b_files = {os.path.splitext(f)[0] for f in os.listdir(dir_b)
               if os.path.splitext(f)[1].lower() in [".jpg", ".jpeg", ".png"]}

    # Files to delete
    delete_from_a = a_files - b_files
    delete_from_b = b_files - a_files

    # Delete from A
    count_a = 0
    for name in delete_from_a:
        path = os.path.join(dir_a, name + ".txt")
        if os.path.exists(path):
            os.remove(path)
            count_a += 1

    # Delete from B
    count_b = 0
    for name in delete_from_b:
        # delete any image extension matching this name
        for ext in [".jpg", ".jpeg", ".png"]:
            path = os.path.join(dir_b, name + ext)
            if os.path.exists(path):
                os.remove(path)
                count_b += 1

    print("Deleted from dir_a:", count_a)
    print("Deleted from dir_b:", count_b)

dir_a = ...
dir_b = ...
run(dir_a, dir_b)

Deleted from dir_a: 0
Deleted from dir_b: 632


In [None]:
### Update name of images from this format, to another format
#       E.g. "cái tô_001", "cái tô_002" --> "img_001", "img_002"
import os

SOURCE_FOLDER = ...

for i, filename in enumerate(os.listdir(SOURCE_FOLDER)):
    if filename.startswith("cái tô_"):
        new_name = "to_" + filename[len("cái tô_"):]
        os.rename(
            os.path.join(SOURCE_FOLDER, filename),
            os.path.join(SOURCE_FOLDER, new_name)
        )

In [None]:
### Change all files in image and label dirs into one format.
import os
import shutil

IMAGE_DIR = ...
LABEL_DIR = ...

# Extensions considered image files
IMAGE_EXTS = {".jpg", ".jpeg", ".png"}

# Naming template: img_0001.jpg
NAME_TEMPLATE = "last_knife_1412_{:04d}"

def main():
    image_files = sorted([
        f for f in os.listdir(IMAGE_DIR)
        if os.path.splitext(f)[1].lower() in IMAGE_EXTS
    ])

    counter = 1
    for img_file in image_files:
        img_name, img_ext = os.path.splitext(img_file)
        label_file = img_name + ".txt"

        old_img_path = os.path.join(IMAGE_DIR, img_file)
        old_label_path = os.path.join(LABEL_DIR, label_file)

        # Skip if corresponding label missing
        if not os.path.exists(old_label_path):
            print(f"[WARN] Missing label for {img_file}. Skipped.")
            continue

        # Prepare new names
        new_base = NAME_TEMPLATE.format(counter)
        new_img_name = new_base + img_ext.lower()
        new_label_name = new_base + ".txt"

        new_img_path = os.path.join(IMAGE_DIR, new_img_name)
        new_label_path = os.path.join(LABEL_DIR, new_label_name)

        # Rename both
        shutil.move(old_img_path, new_img_path)
        shutil.move(old_label_path, new_label_path)

        print(f"Renamed: {img_file} -> {new_img_name}")
        print(f"         {label_file} -> {new_label_name}")

        counter += 1


if __name__ == "__main__":
    main()