In [13]:
import os


def list_all_files_in_directory(directory_path):
    directory_path = os.path.abspath(directory_path)
    all_file_paths = []
    for _, _, files in os.walk(directory_path):
        for file in files:
            all_file_paths.append(file)

    return all_file_paths

In [14]:
def extract_filename_without_extension(file_path):
    base_name = os.path.basename(file_path)
    return os.path.splitext(base_name)[0]

In [None]:
dataset_root_path = './datasets/dolos_data/celebahq/fake/'

all_files = {
    "train": [
        extract_filename_without_extension(f)
        for f in list_all_files_in_directory(os.path.join(dataset_root_path, "lama/images/train"))
    ],
    "test": [
        extract_filename_without_extension(f)
        for f in list_all_files_in_directory(os.path.join(dataset_root_path, "lama/images/test"))
    ],
    "valid": [
        extract_filename_without_extension(f)
        for f in list_all_files_in_directory(os.path.join(dataset_root_path, "lama/images/valid"))
    ],
}

print("Initial files:")
print("train -->", len(all_files["train"]))
print("test -->", len(all_files["test"]))
print("valid -->", len(all_files["valid"]))

import random

random.seed(42)

random.shuffle(all_files["train"])
random.shuffle(all_files["test"])
random.shuffle(all_files["valid"])


all_files["train"] = all_files["train"][:3000]
all_files["valid"] = all_files["valid"][:300]

print()
print("Randomized files:")
print("train -->", len(all_files["train"]))
print("test -->", len(all_files["test"]))
print("valid -->", len(all_files["valid"]))

Initial files:
train --> 9000
test --> 900
valid --> 900

Randomized files:
train --> 3000
test --> 900
valid --> 300


In [16]:
train = ["ldm", "repaint-p2-9k", "lama", "pluralistic"]
test = ["ldm", "repaint-p2-9k", "lama", "pluralistic"]
result_dir = "train_all_4_datasets"

In [17]:
os.makedirs(result_dir)

images_dir = os.path.join(result_dir, "images")
os.makedirs(images_dir)

masks_dir = os.path.join(result_dir, "masks")
os.makedirs(masks_dir)

for dir in ["test", "train", "valid"]:
    os.makedirs(os.path.join(images_dir, dir))
    os.makedirs(os.path.join(masks_dir, dir))

In [18]:
import shutil

for dataset_selected in train:
    for type in ["images", "masks"]:
        for set in ["train", "valid"]:
            for file in all_files[set]:
                path = f"{dataset_root_path}{dataset_selected}/{type}/{set}/{file}.png"
                new_path = f"{result_dir}/{type}/{set}"
                shutil.copy(path, new_path)
                os.rename(
                    f"{new_path}/{file}.png",
                    f"{result_dir}/{type}/{set}/{file}_{dataset_selected}.png",
                )
for test_dataset_selected in test:
    for type in ["images", "masks"]:
        for file in all_files["test"]:
            path = f"{dataset_root_path}{test_dataset_selected}/{type}/test/{file}.png"
            new_path = f"{result_dir}/{type}/test"
            shutil.copy(path, new_path)
            os.rename(
                f"{new_path}/{file}.png",
                f"{result_dir}/{type}/test/{file}_{test_dataset_selected}.png",
            )

In [20]:
print(len(list_all_files_in_directory(result_dir + "/images/train")))
print(len(list_all_files_in_directory(result_dir + "/masks/train")))
print(len(list_all_files_in_directory(result_dir + "/images/test")))
print(len(list_all_files_in_directory(result_dir + "/masks/test")))
print(len(list_all_files_in_directory(result_dir + "/images/valid")))
print(len(list_all_files_in_directory(result_dir + "/masks/valid")))

12000
12000
3600
3600
1200
1200
