### Load packages

In [8]:
import xml.etree.ElementTree as ET
import os
from sklearn.model_selection import train_test_split

### Setup

In [22]:
DATA_PATH = os.path.abspath(os.path.join("../../../..", "Seafile", "Detecting-Objects", "DataCollection"))
IMAGE_PATH = os.path.join(DATA_PATH, "images")
LABEL_PATH = os.path.join(DATA_PATH, "labels")

TRAIN_PATH = os.path.join(DATA_PATH, 'splits', 'train')
VAL_PATH = os.path.join(DATA_PATH, 'splits', 'val')
TEST_PATH = os.path.join(DATA_PATH, 'splits', 'test')

TRAIN_SPLIT = 0.7
VAL_SPLIT = 0.2
TEST_SPLIT = 0.1

### Split data

In [21]:
# Create train, test, val partition from our 3 classes (rock, paper, scissors)
def create_partition(image_path):
    for folder in os.listdir(image_path):
        if folder.startswith('.') or folder.startswith('_'):
            continue
        
        # Read images and labels from directory
        images = [image for image in os.listdir(os.path.join(IMAGE_PATH, folder, 'rgb')) if not image.startswith('.')]
        labels = [label for label in os.listdir(os.path.join(IMAGE_PATH, folder, 'pascal_voc')) if not label.startswith('.')]
        
        # Split into train test validation set. From: https://datascience.stackexchange.com/a/53161
        X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=1 - TRAIN_SPLIT, random_state=1337)
        X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=TEST_SPLIT / (TEST_SPLIT + VAL_SPLIT), random_state=1337)        
        
        # Create folder 
        for folder_path in [TRAIN_PATH, TEST_PATH, VAL_PATH]:            
            if not os.path.exists(folder_path):
                os.mkdir(folder_path)
        
        # Copy files and labels to train, test, val folders
        for file_name in X_train:
            shutil.copy(os.path.join(IMAGE_PATH, folder, 'rgb', file_name), TRAIN_PATH)
            shutil.copy(os.path.join(IMAGE_PATH, folder, 'pascal_voc', f"{file_name.split('.')[0]}.xml"), TRAIN_PATH)
            
        for file_name in X_test:
            shutil.copy(os.path.join(IMAGE_PATH, folder, 'rgb', file_name), TEST_PATH)
            shutil.copy(os.path.join(IMAGE_PATH, folder, 'pascal_voc', f"{file_name.split('.')[0]}.xml"), TEST_PATH)
            
        for file_name in X_val:
            shutil.copy(os.path.join(IMAGE_PATH, folder, 'rgb', file_name), VAL_PATH)
            shutil.copy(os.path.join(IMAGE_PATH, folder, 'pascal_voc', f"{file_name.split('.')[0]}.xml"), VAL_PATH)
            
create_partition(IMAGE_PATH)