In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pyrootutils.root

In [3]:
from sklearn.model_selection import KFold
import numpy as np
import yaml
import os

In [8]:
dataset_name = '01`3_RX100+aligned_iPhone'
n_splits = 1
seed_cross_validation = 42
seed_permutation = 48
# ratios for training, validation and testing
ratios = [0.7, 0.3, 0.0]

os.makedirs(f'config/training_files/{dataset_name}')
os.makedirs(f'config/validation_files/{dataset_name}')
os.makedirs(f'config/testing_files/{dataset_name}')

In [9]:
dataset = f'config/dataset/{dataset_name}.yaml'

with open(dataset, 'r') as stream:
    files = yaml.safe_load(stream)

indices = np.arange(len(files))

if n_splits == 1:
    np.random.shuffle(indices)
    trn_len = int(len(indices) * (ratios[0] + ratios[1]))
    folds = [[indices[:trn_len], indices[trn_len:]]]
else:
    folds = KFold(n_splits=n_splits, random_state=seed_cross_validation, shuffle=True).split(indices)

print(int(len(indices) * ratios[0]), int(len(indices) * ratios[1]), int(len(indices) * ratios[2]))
print('-' * 10)

np.random.seed(seed_permutation)

for i, (trn_indices, tst_indices) in enumerate(folds):
    trn_indices = np.random.permutation(trn_indices)

    p = int(len(trn_indices) * (ratios[0] / (ratios[0] + ratios[1])))
    trn_indices, val_indices = trn_indices[:p], trn_indices[p:]
    
    trn_indices = sorted(trn_indices)
    val_indices = sorted(val_indices)
    tst_indices = sorted(tst_indices)

    print(len(trn_indices), len(val_indices), len(tst_indices))

    with open(f'config/training_files/{dataset_name}/{i}.yaml', 'w') as yaml_file:
        trn_files = [files[i] for i in trn_indices]
        yaml.dump(trn_files, yaml_file, default_flow_style=False)

    with open(f'config/validation_files/{dataset_name}/{i}.yaml', 'w') as yaml_file:
        val_files = [files[i] for i in val_indices]
        yaml.dump(val_files, yaml_file, default_flow_style=False)

    with open(f'config/testing_files/{dataset_name}/{i}.yaml', 'w') as yaml_file:
        tst_files = [files[i] for i in tst_indices]
        yaml.dump(tst_files, yaml_file, default_flow_style=False)

48 20 0
----------
48 21 0


Create training and validation sets for fine-tuning last layer of structured predictor

In [4]:
dataset_name = '013_Mobius_structured'
n_splits = 2
# set split name explicitly
split = None
seed = 42
# ratios for training, validation and testing
ratios = [0.2, 0.2, 0.6]

# os.makedirs(f'config/structured_predictor/training_files/{dataset_name}')
# os.makedirs(f'config/structured_predictor/validation_files/{dataset_name}')
# os.makedirs(f'config/structured_predictor/testing_files/{dataset_name}')

In [5]:
dataset = f'config/dataset/{dataset_name}.yaml'

with open(dataset, 'r') as stream:
    files = yaml.safe_load(stream)

indices = np.arange(len(files))

np.random.seed(seed)

if n_splits == 1:
    np.random.shuffle(indices)
    trn_len = int(len(indices) * (ratios[0] + ratios[1]))
    folds = [[indices[:trn_len], indices[trn_len:]]]
else:
    folds = KFold(n_splits=n_splits, random_state=seed, shuffle=True).split(indices)

print(int(len(indices) * ratios[0]), int(len(indices) * ratios[1]), int(len(indices) * ratios[2]))
print('-' * 10)

for i, (trn_indices, tst_indices) in enumerate(folds):
    trn_indices = np.random.permutation(trn_indices)

    p = int(len(trn_indices) * (ratios[0] / (ratios[0] + ratios[1])))
    trn_indices, val_indices = trn_indices[:p], trn_indices[p:]
    
    trn_indices = sorted(trn_indices)
    val_indices = sorted(val_indices)
    tst_indices = sorted(tst_indices)

    print(len(trn_indices), len(val_indices), len(tst_indices))

    i = i if split is None else split

    with open(f'config/structured_predictor/training_files/{dataset_name}/{i}.yaml', 'w') as yaml_file:
        trn_files = [files[i] for i in trn_indices]
        yaml.dump(trn_files, yaml_file, default_flow_style=False)

    with open(f'config/structured_predictor/validation_files/{dataset_name}/{i}.yaml', 'w') as yaml_file:
        val_files = [files[i] for i in val_indices]
        yaml.dump(val_files, yaml_file, default_flow_style=False)

    with open(f'config/structured_predictor/testing_files/{dataset_name}/{i}.yaml', 'w') as yaml_file:
        tst_files = [files[i] for i in tst_indices]
        yaml.dump(tst_files, yaml_file, default_flow_style=False)

7 7 21
----------
8 9 18
9 9 17


Create training and validation sets for structured prediction (shuffle trn and val data)

In [6]:
dataset_name = '004_eyedea_all_aligned_RX100'

In [4]:
from src import *

In [22]:
trn_to_val_ratio = 0.6

os.makedirs(f'config/structured_predictor/training_files/{dataset_name}/', exist_ok=True)
os.makedirs(f'config/structured_predictor/validation_files/{dataset_name}/', exist_ok=True)
os.makedirs(f'config/structured_predictor/testing_files/{dataset_name}/', exist_ok=True)

make_yaml_list = lambda x: [f'- {f}' for f in x]

for i in range(5):
    print(i)
    training_files = load_yaml(f'config/training_files/{dataset_name}/{i}.yaml')
    validation_files = load_yaml(f'config/validation_files/{dataset_name}/{i}.yaml')
    testing_files = load_yaml(f'config/testing_files/{dataset_name}/{i}.yaml')
    
    all_files = training_files + validation_files
    np.random.shuffle(all_files)
    p = int(len(all_files) * trn_to_val_ratio)
    training_files, validation_files = all_files[:p], all_files[p:]
    training_files = sorted(training_files)
    validation_files = sorted(validation_files)
    
    np.savetxt(f'config/structured_predictor/training_files/{dataset_name}/{i}.yaml', make_yaml_list(training_files), fmt='%s')
    np.savetxt(f'config/structured_predictor/validation_files/{dataset_name}/{i}.yaml', make_yaml_list(validation_files), fmt='%s')
    np.savetxt(f'config/structured_predictor/testing_files/{dataset_name}/{i}.yaml', make_yaml_list(testing_files), fmt='%s')
