# MergeDataset to Alldata

* Maicon/datasets/alldata: 라벨링한 모든 데이터를 보관하는 디렉토리
  * Maicon/datasets/alldata/images: 모든 이미지
  * Maicon/datasets/alldata/images: 모든 라벨

In [1]:
import os
import shutil

def copy_images_and_labels(source_dir, dest_dir):
    # Define destination paths for images and labels
    image_dest = os.path.join(dest_dir, 'images')
    label_dest = os.path.join(dest_dir, 'labels')

    # Ensure destination directories exist
    os.makedirs(image_dest, exist_ok=True)
    os.makedirs(label_dest, exist_ok=True)

    # Define source paths for images and labels
    image_src = os.path.join(source_dir, 'images')
    label_src = os.path.join(source_dir, 'labels')

    # Copy images
    for file_name in os.listdir(image_src):
        src_file = os.path.join(image_src, file_name)
        dest_file = os.path.join(image_dest, file_name)
        if os.path.isfile(src_file) and not os.path.exists(dest_file):
            shutil.copy2(src_file, dest_file)
            print(f"Copied image: {src_file} to {dest_file}")
        else:
            print(f"Skipped image (already exists): {dest_file}")

    # Copy labels
    for file_name in os.listdir(label_src):
        src_file = os.path.join(label_src, file_name)
        dest_file = os.path.join(label_dest, file_name)
        if os.path.isfile(src_file) and not os.path.exists(dest_file):
            shutil.copy2(src_file, dest_file)
            print(f"Copied label: {src_file} to {dest_file}")
        else:
            print(f"Skipped label (already exists): {dest_file}")

In [None]:
# roboflow_project -> all data
copy_images_and_labels('/Users/jjookim/Projects/AIForce/datasets/roboflow_project/train', '/Users/jjookim/Projects/AIForce/datasets/all_data')

# 태스크를 두 개로 나누었을, 필요한 함수들

In [None]:


import shutil
import os

source_dir = '/content/drive/MyDrive/Maicon/datasets/alldata/valid_main'
dest_dir = '/content/drive/MyDrive/Maicon/datasets/alldata/valid_main_onlyenemtank'

# Ensure the destination directory doesn't already exist (or handle it as you need)
if not os.path.exists(dest_dir):
    shutil.copytree(source_dir, dest_dir)
else:
    print("Destination directory already exists. Choose a new location or remove it first.")

In [None]:


import shutil
import os

source_dir = '/content/drive/MyDrive/Maicon/datasets/alldata/test_main'
dest_dir = '/content/drive/MyDrive/Maicon/datasets/alldata/test_main_onlyenemtank'

# Ensure the destination directory doesn't already exist (or handle it as you need)
if not os.path.exists(dest_dir):
    shutil.copytree(source_dir, dest_dir)
else:
    print("Destination directory already exists. Choose a new location or remove it first.")

In [None]:
import os

def filter_and_remap_labels(label_dir, allowed_labels):
    """
    Filters and remaps labels in each file within the given directory to retain only specified label indices.
    Remaps the labels to start from 0 in sequential order.

    Parameters:
        label_dir (str): Path to the directory containing label files.
        allowed_labels (set): Set of allowed label indices (e.g., {0, 2}).
    """
    # Ensure the directory exists
    if not os.path.isdir(label_dir):
        print(f"Directory {label_dir} does not exist.")
        return

    # Create a mapping for allowed labels to a continuous range starting from 0
    label_mapping = {original: new for new, original in enumerate(sorted(allowed_labels))}

    # Iterate over each label file in the directory
    for filename in os.listdir(label_dir):
        if filename.endswith(".txt"):
            file_path = os.path.join(label_dir, filename)

            # Read the content of the file
            with open(file_path, 'r') as file:
                lines = file.readlines()

            # Filter and remap labels
            filtered_lines = []
            for line in lines:
                parts = line.split()
                label = int(parts[0])

                if label in allowed_labels:
                    # Remap label
                    remapped_label = label_mapping[label]
                    # Construct the new line with remapped label
                    new_line = f"{remapped_label} " + " ".join(parts[1:]) + "\n"
                    filtered_lines.append(new_line)

            # Write the filtered and remapped content back to the file
            with open(file_path, 'w') as file:
                file.writelines(filtered_lines)

            print(f"Processed {filename}: retained {len(filtered_lines)} lines.")


In [None]:
label_directory = "/content/drive/MyDrive/Maicon/datasets/alldata/test_main_onlyenemtank/labels"
allowed_labels = {2}
filter_and_remap_labels(label_directory, allowed_labels)


Processed a2_e0_at2_et0_b1_o1 (2).txt: retained 0 lines.
Processed a2_e0_at2_et0_b1_o1 (6).txt: retained 0 lines.
Processed a2_e0_at2_et0_b1_o1 (1).txt: retained 0 lines.
Processed a2_e0_at2_et0_b1_o1 (3).txt: retained 0 lines.
Processed a2_e0_at2_et0_b1_o1 (4).txt: retained 0 lines.
Processed a2_e0_at2_et0_b1_o1 (5).txt: retained 0 lines.
Processed a0_e2_at0_et2_b1_o1 (2).txt: retained 2 lines.
Processed a0_e2_at0_et2_b1_o1 (3).txt: retained 2 lines.
Processed a0_e2_at0_et2_b1_o1 (1).txt: retained 2 lines.
Processed a0_e2_at0_et2_b1_o1 (4).txt: retained 2 lines.
Processed a0_e2_at0_et2_b1_o1 (5).txt: retained 2 lines.
Processed a2_e2_at2_et2_b1_o1 (4).txt: retained 2 lines.
Processed a2_e2_at2_et2_b1_o1 (5).txt: retained 2 lines.
Processed a2_e2_at2_et2_b1_o1 (3).txt: retained 2 lines.
Processed a2_e2_at2_et2_b1_o1 (2).txt: retained 2 lines.
Processed a2_e2_at2_et2_b1_o1 (1).txt: retained 2 lines.
Processed diorama (16).txt: retained 2 lines.
Processed diorama (5).txt: retained 0 line

In [None]:
label_directory = "/content/drive/MyDrive/Maicon/datasets/alldata/valid_main_onlyenemtank/labels"
allowed_labels = {2}
filter_and_remap_labels(label_directory, allowed_labels)


Processed a1_e0_at0_et0_b0_o0-21-_jpg.rf.b41963f034073f3a5c683c5b58bf8c6e.txt: retained 0 lines.
Processed a1_e0_at0_et0_b0_o0-24-_jpg.rf.f651819df75649cb10a81f211da2706d.txt: retained 0 lines.
Processed a1_e0_at0_et0_b0_o0-37-_jpg.rf.2a7224a768b8cb988d5c39fe86632da9.txt: retained 0 lines.
Processed a1_e0_at0_et0_b0_o0-40-_jpg.rf.22a783dc25867b8752a2032b410be6b7.txt: retained 0 lines.
Processed a1_e0_at0_et0_b0_o0-20-_jpg.rf.81afe4af21910e29e000656d0ca9697d.txt: retained 0 lines.
Processed a1_e0_at0_et0_b0_o0-34-_jpg.rf.070946cfa2186f24f9c8f0f91d741797.txt: retained 0 lines.
Processed a1_e0_at0_et0_b0_o0-11-_jpg.rf.31259056b238205a4dce6dbdf4793c34.txt: retained 0 lines.
Processed a1_e0_at0_et0_b0_o0-19-_jpg.rf.ca9ee6197c003e74b729532094fb1b06.txt: retained 0 lines.
Processed a0_e1_at0_et0_b0_o0-32-_jpg.rf.d051d2da2c9c5d5ad3e708f633e656e3.txt: retained 0 lines.
Processed a0_e1_at0_et0_b0_o0-51-_jpg.rf.ed7039c547913fb1158eab589ea2068c.txt: retained 0 lines.
Processed a0_e1_at0_et0_b0_o0-

In [None]:
label_directory = "/content/drive/MyDrive/Maicon/datasets/alldata/train_v6_onlyenemtank/labels"
allowed_labels = {2}
filter_and_remap_labels(label_directory, allowed_labels)


Processed a0_e0_at0_et1_b0_o0-55-_jpg.rf.887accc56f930740573f5473a33bd9c2.txt: retained 1 lines.
Processed a0_e0_at0_et1_b0_o0-24-_jpg.rf.e8d8467de2c523db1999496b37ed2ff3.txt: retained 1 lines.
Processed a0_e0_at0_et1_b0_o0-3-_jpg.rf.5a8d1b8f8638c766da056f838f4acf97.txt: retained 1 lines.
Processed a0_e0_at0_et1_b0_o0-20-_jpg.rf.18c3cda90f8fcd2083c448bf8e2277ca.txt: retained 1 lines.
Processed a0_e0_at0_et1_b0_o0-15-_jpg.rf.d23be9c3d827b125fc7a6459087ec987.txt: retained 1 lines.
Processed a0_e0_at0_et1_b0_o0-63-_jpg.rf.f498dbf63654a9e429ec65af1e35a318.txt: retained 1 lines.
Processed a0_e0_at0_et1_b0_o0-19-_jpg.rf.c9c2bf0a613af131312236c31f32f491.txt: retained 1 lines.
Processed a0_e0_at0_et1_b0_o0-52-_jpg.rf.12ac887bc21c147f955f77395135e822.txt: retained 1 lines.
Processed a0_e0_at0_et1_b0_o0-33-_jpg.rf.65345881cb218e248b0d475c24b1446d.txt: retained 1 lines.
Processed a0_e0_at0_et1_b0_o0-64-_jpg.rf.2883535a5d8a1a78cf290e887274268d.txt: retained 1 lines.
Processed a0_e0_at0_et1_b0_o0-1