In [1]:
import os
import shutil
import random
from PIL import Image

#### Resize originals ps-battles images to 224x224

In [None]:
input_folder = "ps-battles/originals"
output_folder = "ps-battles-proc/originals"

os.makedirs(output_folder, exist_ok=True)

target_size = (224, 224)

for filename in os.listdir(input_folder):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        img_path = os.path.join(input_folder, filename)
        img = Image.open(img_path).convert("RGB")

        img_resized = img.resize(
            target_size, Image.LANCZOS
        )

        save_path = os.path.join(output_folder, filename.split('.')[0] + ".jpg")
        img_resized.save(save_path, "JPEG", quality=85)

print("✅ Resizing complete! Images saved to:", output_folder)

✅ Resizing complete! Images saved to: ps-battles-proc/originals


#### Resize photoshopped ps-battles images to 224x224

In [None]:
input_folder = "ps-battles/photoshops"
output_folder = "ps-battles-proc/photoshops"

os.makedirs(output_folder, exist_ok=True)

target_size = (224, 224)

for subfolder in os.listdir(input_folder):
    
    os.makedirs(os.path.join(output_folder, subfolder), exist_ok=True)
        
    for filename in os.listdir(os.path.join(input_folder, subfolder)):
        
        if os.path.exists(os.path.join(output_folder, subfolder, filename.split('.')[0] + ".jpg")):
            continue
        
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(input_folder, subfolder, filename)
            
            try:
                img = Image.open(img_path).convert("RGB")
            except:
                print("Error opening image", img_path)
                continue

            img_resized = img.resize(
                target_size, Image.LANCZOS
            )

            save_path = os.path.join(output_folder, subfolder, filename.split('.')[0] + ".jpg")
            img_resized.save(save_path, "JPEG", quality=85)

print("✅ Resizing complete! Images saved to:", output_folder)

Error opening image ps-battles/photoshops/3atzz7/csgb1c5_0.jpg




✅ Resizing complete! Images saved to: ps-battles-proc/photoshops


#### Change the structure of ps-battles folders

In [23]:
def split_and_move(files, train_dest, test_dest, split_ratio=0.8):
    random.shuffle(files)
    split_idx = int(len(files) * split_ratio)

    for i, file in enumerate(files):
        src_path = file
        dest_path = os.path.join(train_dest if i < split_idx else test_dest, os.path.basename(file))
        shutil.move(src_path, dest_path)

In [24]:
base_dir = "ps-battles-proc"
originals_dir = os.path.join(base_dir, "originals")
photoshops_dir = os.path.join(base_dir, "photoshops")

train_real = os.path.join(base_dir, "train", "real")
test_real = os.path.join(base_dir, "test", "real")
train_photoshopped = os.path.join(base_dir, "train", "fake")
test_photoshopped = os.path.join(base_dir, "test", "fake")

for path in [train_real, test_real, train_photoshopped, test_photoshopped]:
    os.makedirs(path, exist_ok=True)

if os.path.exists(originals_dir):
    original_files = [os.path.join(originals_dir, f) for f in os.listdir(originals_dir) if os.path.isfile(os.path.join(originals_dir, f))]
    split_and_move(original_files, train_real, test_real)

photoshopped_files = []
if os.path.exists(photoshops_dir):
    for folder in os.listdir(photoshops_dir):
        folder_path = os.path.join(photoshops_dir, folder)
        if os.path.isdir(folder_path):
            for file in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file)
                if os.path.isfile(file_path):
                    photoshopped_files.append(file_path)

if photoshopped_files:
    split_and_move(photoshopped_files, train_photoshopped, test_photoshopped)

if os.path.exists(originals_dir):
    shutil.rmtree(originals_dir)

if os.path.exists(photoshops_dir):
    shutil.rmtree(photoshops_dir)

print("✅ Dataset reorganized successfully!")

✅ Dataset reorganized successfully!


#### Create a new folder for validation set on ps-battles and cifake

In [25]:
def make_validation_set(base_dir, validation_dir, split_ratio=0.2):
    os.makedirs(validation_dir, exist_ok=True)
    for folder in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder)
        if os.path.isdir(folder_path):
            files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
            random.shuffle(files)
            split_idx = int(len(files) * split_ratio)
            for file in files[:split_idx]:
                src_path = os.path.join(folder_path, file)
                dest_path = os.path.join(validation_dir, folder, file)
                os.makedirs(os.path.dirname(dest_path), exist_ok=True)
                shutil.move(src_path, dest_path)

In [26]:
base_dir = "ps-battles-proc"
train_dir = os.path.join(base_dir, "train")
validation_dir = os.path.join(base_dir, "val")

make_validation_set(train_dir, validation_dir)

print("✅ Validation set created successfully!")

✅ Validation set created successfully!


In [27]:
base_dir = "cifake"
train_dir = os.path.join(base_dir, "train")
validation_dir = os.path.join(base_dir, "val")

make_validation_set(train_dir, validation_dir)

print("✅ Validation set created successfully!")

✅ Validation set created successfully!


#### Subsample datasets

In [4]:
def subsample_dataset(base_dir, output_dir, percentage=0.2):
    os.makedirs(output_dir, exist_ok=True)
    for folder in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder)
        for class_folder in os.listdir(folder_path):
            class_path = os.path.join(folder_path, class_folder)
            files = [f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
            random.shuffle(files)
            split_idx = int(len(files) * percentage)
            for file in files[:split_idx]:
                src_path = os.path.join(class_path, file)
                dest_path = os.path.join(output_dir, folder, class_folder, file)
                os.makedirs(os.path.dirname(dest_path), exist_ok=True)
                shutil.copy(src_path, dest_path)

In [6]:
base_dir = "ps-battles-proc"
output_dir = "ps-battles-proc-sub"

subsample_dataset(base_dir, output_dir)

print("✅ Dataset subsampled successfully!")

✅ Dataset subsampled successfully!


In [7]:
base_dir = "cifake"
output_dir = "cifake-sub"

subsample_dataset(base_dir, output_dir)

print("✅ Dataset subsampled successfully!")

✅ Dataset subsampled successfully!
