In [None]:
import os
import shutil

# Set base directories
images_dir = '/content/drive/MyDrive/Colab Notebooks/2024_CV_project/dataset/images'
labels_dir = '/content/drive/MyDrive/Colab Notebooks/2024_CV_project/dataset/labels'
images_tmp_dir = os.path.join(images_dir, 'tmp')
labels_tmp_dir = os.path.join(labels_dir, 'tmp')

# Create tmp folders if they do not exist
os.makedirs(images_tmp_dir, exist_ok=True)
os.makedirs(labels_tmp_dir, exist_ok=True)

# Folders to move (train/val)
image_folders_to_move = [
    os.path.join(images_dir, 'train'),
    os.path.join(images_dir, 'val')
]

label_folders_to_move = [
    os.path.join(labels_dir, 'train'),
    os.path.join(labels_dir, 'val')
]

# Function to move files into tmp directory (with overwrite)
def move_files_to_tmp(folders, tmp_dir):
    """
    Move all files from the given list of folders to a tmp directory.
    Overwrites files if the destination already exists.
    """
    for folder in folders:
        for root, _, files in os.walk(folder):
            for file in files:
                src_path = os.path.join(root, file)
                dst_path = os.path.join(tmp_dir, file)

                # Overwrite existing destination file
                if os.path.exists(dst_path):
                    os.remove(dst_path)

                shutil.move(src_path, dst_path)

# Move files from images/train and images/val → images/tmp
move_files_to_tmp(image_folders_to_move, images_tmp_dir)

# Move files from labels/train and labels/val → labels/tmp
move_files_to_tmp(label_folders_to_move, labels_tmp_dir)

print("All files from train and val folders have been moved to the tmp folder.")


train과 val 폴더의 모든 파일이 tmp 폴더로 이동되었습니다.


In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Set base directories
images_dir = '/content/drive/MyDrive/Colab Notebooks/2024_CV_project/dataset/images'
labels_dir = '/content/drive/MyDrive/Colab Notebooks/2024_CV_project/dataset/labels'
images_tmp_dir = os.path.join(images_dir, 'tmp')
labels_tmp_dir = os.path.join(labels_dir, 'tmp')

# Create train/val folders
os.makedirs(os.path.join(images_dir, 'train'), exist_ok=True)
os.makedirs(os.path.join(images_dir, 'val'), exist_ok=True)
os.makedirs(os.path.join(labels_dir, 'train'), exist_ok=True)
os.makedirs(os.path.join(labels_dir, 'val'), exist_ok=True)

# Load all files from tmp folder
image_files = [file for file in os.listdir(images_tmp_dir) if file.endswith('.jpg')]
label_files = [file for file in os.listdir(labels_tmp_dir) if file.endswith('.txt')]

# Separate labeled and unlabeled images
labeled_images = [img for img in image_files if img.replace('.jpg', '.txt') in label_files]
unlabeled_images = [img for img in image_files if img not in labeled_images]

# Split labeled data into train and val (80:20)
train_images, val_images = train_test_split(labeled_images, test_size=0.2, random_state=42)

# Move files to target dataset folders (with overwrite)
def move_files_to_set(files, src_dir, target_dir):
    """Move files from src_dir to target_dir. Overwrite if exists."""
    for file in files:
        src_path = os.path.join(src_dir, file)
        dst_path = os.path.join(target_dir, file)

        # If target exists, remove before moving
        if os.path.exists(dst_path):
            os.remove(dst_path)

        shutil.move(src_path, dst_path)

# Move train and val images
move_files_to_set(train_images, images_tmp_dir, os.path.join(images_dir, 'train'))
move_files_to_set(val_images, images_tmp_dir, os.path.join(images_dir, 'val'))

# Move corresponding label files
train_labels = [img.replace('.jpg', '.txt') for img in train_images]
val_labels = [img.replace('.jpg', '.txt') for img in val_images]

move_files_to_set(train_labels, labels_tmp_dir, os.path.join(labels_dir, 'train'))
move_files_to_set(val_labels, labels_tmp_dir, os.path.join(labels_dir, 'val'))

# Remove any leftover unused label files from tmp
for file in os.listdir(labels_tmp_dir):
    os.remove(os.path.join(labels_tmp_dir, file))

print("Dataset split completed!")
print(f"Train images: {len(train_images)}, Val images: {len(val_images)}, Remaining unlabeled images: {len(unlabeled_images)}")


데이터 스플릿 완료!
Train 이미지: 797, Val 이미지: 200, Tmp에 남은 이미지: 3589
