In [23]:
# work in progress

In [24]:
# NOTE
# 
# could it be a confound if smaller pain reductions are not detected?
# or if big pain reductions are quite rare (3 per one trial over 4 min, as of now)
# -> should we only do "big" pain reductions?

## Estimation of data set size

In [25]:
import numpy as np

In [26]:
print("Note: Samples are defined as sections with big temperature reductions.")

def _holdout_sizes(size, val, test):
    """
    Returns the number of subjects/samples for training, validation, and test sets.
    
    Parameters:
    - size: Total number of subjects/samples
    - val: Proportion of validation set
    - test: Proportion of test set
    
    Returns:
    - List of sizes [train, val, test]
    """
    train = 1 - val - test
    return [int(size * train), int(size * val), int(size * test)]

def _print_info(n_subjects_aim, n_samples_per_subject, n_subjects_holdout):
    """
    Prints the details of the dataset.
    
    Parameters:
    - n_subjects_aim: Number of subjects aimed for
    - n_samples_per_subject: Number of samples per subject
    - n_subjects_holdout: List of sizes [train, val, test] for subjects
    """
    print(f"Number of subjects: {n_subjects_aim}")
    print(f"Number of samples: {n_subjects_aim * n_samples_per_subject}")
    print(f"Number of samples per subject: {n_samples_per_subject}")
    print(f"\nNumber of subjects in holdout set: {n_subjects_holdout}")
    print(f"Number of samples in train set: {n_subjects_holdout[0] * n_samples_per_subject}")

def estimate_dataset_size(n_subjects_aim=None, n_samples_aim=None):
    """
    Estimates the size of the dataset.
    
    Parameters:
    - n_subjects_aim: Number of subjects aimed for
    - n_samples_aim: Number of samples aimed for
    """
    n_samples_per_trial = 3
    n_trials = 4
    n_skin_areas = 3
    n_samples_per_subject = n_trials * n_skin_areas * n_samples_per_trial
    time_per_trial_s = 250
    time_per_session_min = n_skin_areas * n_trials * time_per_trial_s // 60
    
    if n_subjects_aim is None and n_samples_aim is None:
        raise ValueError("Either n_subjects_aim or n_samples_aim must be provided.")
    
    if n_subjects_aim:
        print("Estimate dataset size via n_subjects_aim:\n")
        n_samples_aim = n_subjects_aim * n_samples_per_subject
    else:
        print("Estimate dataset size via n_samples_aim:\n")
        n_subjects_aim = int(np.ceil(n_samples_aim / n_samples_per_subject))
        
    print(f"Stimulation time per session: {time_per_session_min} min")
    print(f"Stimulation time per trial: {time_per_trial_s} s\n")
    print(f"Number of samples per subject: {n_samples_per_subject}")

    n_subjects_holdout = _holdout_sizes(n_subjects_aim, 0.2, 0.2)
    _print_info(n_subjects_aim, n_samples_per_subject, n_subjects_holdout)
    
    info_dict = {
        'time_per_session_min': time_per_session_min,
        'time_per_trial_s': time_per_trial_s,
        'n_samples_per_subject': n_samples_per_subject,
        'n_subjects_aim': n_subjects_aim,
        'n_samples_aim': n_samples_aim,
        'n_subjects_holdout': n_subjects_holdout,
        'n_samples_in_train_set': n_subjects_holdout[0] * n_samples_per_subject
    }
    
    return info_dict

Note: Samples are defined as sections with big temperature reductions.


In [27]:
# via n_subjects_aim
n_subjects_aim = 50
info = estimate_dataset_size(n_subjects_aim = n_subjects_aim)

Estimate dataset size via n_subjects_aim:

Stimulation time per session: 50 min
Stimulation time per trial: 250 s

Number of samples per subject: 36
Number of subjects: 50
Number of samples: 1800
Number of samples per subject: 36

Number of subjects in holdout set: [30, 10, 10]
Number of samples in train set: 1080


In [28]:
# via n_samples_aim
n_samples_aim = 1800
info = estimate_dataset_size(n_samples_aim = n_samples_aim)

Estimate dataset size via n_samples_aim:

Stimulation time per session: 50 min
Stimulation time per trial: 250 s

Number of samples per subject: 36
Number of subjects: 50
Number of samples: 1800
Number of samples per subject: 36

Number of subjects in holdout set: [30, 10, 10]
Number of samples in train set: 1080


## Estimation of work hours

In [29]:
setting_everything_up_min = 60

time_per_subject_min = info['time_per_session_min'] + setting_everything_up_min
time_per_experiment_min = time_per_subject_min * info['n_subjects_aim']
time_per_experiment_h = time_per_experiment_min // 60
n_experiments = 2
time_experiments_h = time_per_experiment_h * n_experiments
print(f"Total work hours for both experiments: {time_experiments_h} h")

Total work hours for both experiments: 182 h
