In [None]:
import os
import shutil
import numpy as np

In [None]:
# Define the split percentages
train_split = 0.7
val_split = 0.15
test_split = 0.15

In [None]:
# Define the paths
source_folder = 'ChiSig'
train_folder = 'train'
val_folder = 'validation'
test_folder = 'test'

In [None]:
# Create folders for the splits
for folder in [train_folder, val_folder, test_folder]:
    os.makedirs(folder, exist_ok=True)
    os.makedirs(os.path.join(folder, 'positive'), exist_ok=True)
    os.makedirs(os.path.join(folder, 'negative'), exist_ok=True)

In [None]:
# Identify professional forgeries and corresponding originals
all_files = os.listdir(source_folder)

In [None]:
print(all_files)

In [None]:
originals = set()
professional_forgeries = set()

In [None]:
for file in all_files:
    parts = file.split('-')
    if len(parts) < 3:
        continue

    number_a = int(parts[1])
    if number_a > 100:
        professional_forgeries.add(number_a)
        originals.add(number_a - 100)

In [None]:
# Function to determine label
def get_label(filename):
    number_a = int(filename.split('-')[1])

    # Original signature
    if number_a in originals:
        return 'positive'
    # Professional forgery
    elif number_a in professional_forgeries:
        return 'negative'
    # Random forgery
    else:
        return 'negative'

In [None]:
# Shuffle files
seed = 42
np.random.seed(seed)
np.random.shuffle(all_files)

In [None]:
# Split the files
total_files = len(all_files)
train_count = int(total_files * train_split)
val_count = int(total_files * val_split)

In [None]:
train_files = all_files[:train_count]
val_files = all_files[train_count:train_count + val_count]
test_files = all_files[train_count + val_count:]

In [None]:
def copy_files(files, target_folder):
    for file in files:
        label = get_label(file)
        if label:
            shutil.copy(os.path.join(source_folder, file), os.path.join(target_folder, label))

In [None]:
# Copy files to their respective sets
copy_files(train_files, train_folder)
copy_files(val_files, val_folder)
copy_files(test_files, test_folder)

print("Data splitting completed.")