# Libraries

In [6]:
import numpy as np
import sklearn.datasets as datasets
import time
import copy
import scipy
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_friedman3
from sklearn.model_selection import GroupKFold, GroupShuffleSplit
from tqdm.notebook import tqdm, tnrange

In [5]:
from DME.Simulation import train

# Helper functions

In [None]:
def linear_func(n_samples, random_state=1):
    np.random.seed(random_state)
    X = np.random.uniform(size=(n_samples, 4))
    F = 1 + np.sum(X, axis=1)

    return X, F

In [None]:
def exponential_kernel(x1, x2, sigma2, rho):
    '''
    Computes the exponential kernel matrix between two vectors.
    '''
    x1 = np.expand_dims(x1, axis=1)
    x2 = np.expand_dims(x2, axis=1)

    return sigma2 * np.exp(-scipy.spatial.distance.cdist(x1, x2, 'sqeuclidean') / rho)

In [None]:
def generate_data(n, m, p, func, shared_gp=True, random_state=1):
    # Features
    if func == 'make_friedman3':
        X, F = make_friedman3(n_samples=n, random_state=random_state)
        F *= 10**0.5
    elif func == 'linear_func':
        X, F = linear_func(n_samples=n, random_state=random_state)
    else:
        raise Exception('function is invalid') 

    # Create groups
    group = np.arange(n) # Variable that stores group IDs
    for i in range(m):
        group[i*p:(i+1)*p] = i

    # Incidence matrix relating grouped random effects to observations
    Z1 = np.zeros((n, m))
    for i in range(m):
        Z1[np.where(group==i), i] = 1

    # Simulate random (sorted) observation times for each group
    rng = np.random.default_rng(seed=random_state)
    times_arrays = [np.sort(rng.choice(1000, size=p, replace=False, shuffle=False)) for i in range(m)]
    times = np.concatenate(times_arrays)

    # Simulate GPs
    GP_list = []
    np.random.seed(random_state)
    if shared_gp:
        # Simulate GPs with same parameters
        sigma2_2 = 1 ** 2  # Marginal variance of GP
        rho = 0.1  # GP Range parameter
        for arr in times_arrays:
            K = exponential_kernel(arr, arr, sigma2_2, rho)
            g = np.random.multivariate_normal(mean=np.zeros(p), cov=K)
            GP_list.append(g)
    else:
        # Simulate GPs with different, random parameters
        for arr in times_arrays:
            sigma2_2 = scipy.stats.invgamma.rvs(1, loc=0, scale=10)
            rho = np.random.uniform(0.01, 1000)
            K = exponential_kernel(arr, arr, sigma2_2, rho)
            g = np.random.multivariate_normal(mean=np.zeros(p), cov=K)
            GP_list.append(g)

    # Simulate outcome variable
    np.random.seed(random_state)
    sigma2 = 0.5 ** 2  # Error variance
    b = np.random.normal(size=m) # Simulate random effect intercept
    G = np.dot(Z1, b) + np.concatenate(GP_list) # Combine random effect intercept with GP
    epsilon = np.sqrt(sigma2) * np.random.normal(size=n) # Simulate error term
    y = F + G + epsilon

    # Create dataframes
    data = pd.concat([pd.DataFrame(F, columns=['F']),
                   pd.DataFrame(group, columns=['group']),
                   pd.DataFrame(times, columns=['times']),
                   pd.DataFrame(X,columns=['feature_1','feature_2','feature_3','feature_4']),
                   pd.DataFrame(y, columns=['y'])],
                   axis=1)

    return data

In [None]:
def generate_datasets(n, m, p, n_datasets, n_valid, func, shared_gp=True, random_state=1):
    datasets = {}
    datasets['data'] = []
    datasets['dataframes'] = []
    datasets['n_samples_chosen_per_group'] = []

    # Create training and test sets
    for i in range(n_datasets):
        data = generate_data(n, m, p, func, shared_gp, random_state=i+random_state)
        df_new, df_test = train_test_split_grouped_extrapolation(data, data['group'], test_size=0.2, random_state=i)
        group_sizes = df_new.groupby(['group']).size().to_numpy()
        n_samples_chosen_per_group = train_test_split_grouped_interpolation(df_new, group_sizes, test_size=0.2, random_state=i)
        datasets['data'].append(data)
        datasets['dataframes'].append([df_new, df_test])
        datasets['n_samples_chosen_per_group'].append(n_samples_chosen_per_group)
    
    # Create validation datasets
    validation_datasets = {}
    validation_datasets['dataframes'] = []
    for i in range(n_valid):
        data = generate_data(n, m, p, func, shared_gp, random_state=i+n_datasets+random_state)
        df_train, df_val = train_test_split_grouped_extrapolation(data, data['group'], test_size=0.2, random_state=i)
        validation_datasets['dataframes'].append([df_train, df_val])

    return datasets, validation_datasets

In [None]:
def param_search(dataframes, model_config, train_config,
                 random_effects_column_names,
                 group_column_name, y_column_name,
                 n_samples_chosen_per_group_list,
                 model_type='MLP',
                 random_state=1):
    best_val_error_list = []
    for i in tnrange(len(dataframes)):
        best_state, best_val_err = train.train_and_evaluate_model(model_config, train_config, dataframes[i], 
                                                                  random_effects_column_names,
                                                                  group_column_name, y_column_name,
                                                                  n_samples_chosen_per_group_list[i],
                                                                  model_type, random_state=random_state)
        best_val_error_list.append(best_val_err)

    return np.mean(best_val_error_list)

In [None]:
def test(dataframes, model_config, train_config,
         random_effects_column_names,
         group_column_name, y_column_name,
         n_samples_chosen_per_group_list,
         model_type='MLP',
         random_state=1):
    F_rmse_test_list, rmse_test1_list, rmse_test2_list = [], [], []
    for i in tnrange(len(dataframes)):
        F_rmse_test1, F_rmse_test2, rmse_test1, rmse_test2 = train.train_and_test_model(model_config, train_config, dataframes[i], random_effects_column_names,
                                                                                        group_column_name, y_column_name,
                                                                                        n_samples_chosen_per_group_list[i],
                                                                                        model_type, random_state=random_state)
        F_rmse_test_list.append(F_rmse_test1)
        F_rmse_test_list.append(F_rmse_test2)
        rmse_test1_list.append(rmse_test1)
        rmse_test2_list.append(rmse_test2)
    
    print('Extrapolation')
    print('Mean: ', np.mean(rmse_test1_list))
    print('Std: ', np.std(rmse_test1_list))
    print('\n')
    print('Interpolation')
    print('Mean: ', np.mean(rmse_test2_list))
    print('Std: ', np.std(rmse_test2_list))
    print('\n')
    print('F')
    print('Mean: ', np.mean(F_rmse_test_list))
    print('Std: ', np.std(F_rmse_test_list))
    
    return np.mean(rmse_test1_list), np.std(rmse_test1_list), np.mean(rmse_test2_list), np.std(rmse_test2_list), np.mean(F_rmse_test_list), np.std(F_rmse_test_list)

In [None]:
def train_test_split_grouped_interpolation(df, group_sizes, test_size=0.2, random_state=1):
    '''
    Train/test split for a dataframe, but test set contains at least one observation from each group in the training set, and contains no unseen groups.
    '''
    assert 0 < test_size < 1, "Test size must be strictly between 0 and 1"
    assert np.sum(group_sizes) == len(df), "Sum of group_sizes must be equal to length of dataframe"
    assert group_sizes.all() > 0, "Group sizes should be non-negative"
    assert len(group_sizes) < len(df), "Number of groups should be less than number of observations"

    np.random.seed(random_state)
    df_len = len(df)
    test_len = int(test_size * df_len)
    no_groups = len(group_sizes)
    group_sizes_new = group_sizes.copy()

    # Pick one observation from all groups
    sample_len = no_groups
    n_samples_chosen_per_group = np.ones_like(group_sizes_new)
    last_idx_arr = np.cumsum(group_sizes_new)-1 # Array of index of the last observation in each group within the overall dataset
    test_idx = [last_idx_arr[i] for i in range(no_groups)]
    group_sizes_new -= 1

    # Keep picking more observations until the required number of test observations has been picked
    while sample_len < test_len:
        group_idx = np.random.randint(no_groups) # Pick a random group
        if group_sizes_new[group_idx] > 1:
            if test_len - sample_len > 1:
                n = np.random.randint(1, min([group_sizes_new[group_idx], test_len-sample_len])) # Pick a random sample of size 1<=n<group_size from the chosen group
            else:
                n = 1
            last_idx = last_idx_arr[group_idx]-n_samples_chosen_per_group[group_idx] # Index of the last observation remaining in each group within the overall dataset
            test_idx += [last_idx-i for i in range(n)]
            n_samples_chosen_per_group[group_idx] += n # Update number of samples chosen from the group
            group_sizes_new[group_idx] -= n # Update current group sizes
            sample_len += n

    return n_samples_chosen_per_group

In [None]:
def train_test_split_grouped_extrapolation(df, groups, test_size=0.2, random_state=1):
    '''
    Train/test split for a dataframe, but test set only contains only unseen groups.
    ``test_size`` represents the proportion of groups to include in the test split (rounded up).
    '''
    train_idx, test_idx = next(GroupShuffleSplit(test_size=test_size, random_state=random_state).split(df, groups=groups))
    df_train, df_test = df.iloc[train_idx], df.iloc[test_idx]

    return df_train, df_test

# Experiment 1: Friedman 3 Function with Temporal Shared GP

In [None]:
# Generate data
n, m = 1000, 50  # Number of observations and groups
p = int(n/m) # Number of observations per group
n_datasets = 20
n_valid = 5
datasets, validation_datasets = generate_datasets(n, m, p, n_datasets, n_valid, func='make_friedman3', random_state=60)

## MLP

In [None]:
model_config = {}
model_config['input_dim'] = 4
model_config['hidden_dim'] = 20
model_config['output_dim'] = 1

train_config = {}
train_config['n_epochs'] = 20
train_config['lr'] = 0.001
train_config['n_adapt'] = 2
train_config['inner_lr'] = 0.1
train_config['l2_penalty'] = 0.001

# val_result = param_search(validation_datasets['dataframes'], model_config, train_config,
#                           random_effects_column_names=['times'],
#                           group_column_name='group', y_column_name='y',
#                           n_samples_chosen_per_group_list=datasets['n_samples_chosen_per_group'],
#                           model_type='MLP')

# print(val_result)

In [None]:
print(val_result)

In [None]:
test1_mean, test1_std, test2_mean, test2_std, F_mean, F_std = test(datasets['dataframes'], model_config, train_config, random_effects_column_names=['times'],
                                                                   group_column_name='group', y_column_name='y',
                                                                   n_samples_chosen_per_group_list=datasets['n_samples_chosen_per_group'],
                                                                   model_type='MLP')

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=2.088 and loss=28.789
[Test (Interpolation)] RMSE=1.965 and loss=46.679


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=1.960 and loss=30.057
[Test (Interpolation)] RMSE=1.725 and loss=43.149


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=1.931 and loss=34.921
[Test (Interpolation)] RMSE=1.599 and loss=45.508


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=1.962 and loss=33.639
[Test (Interpolation)] RMSE=1.851 and loss=46.596


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=1.981 and loss=31.240
[Test (Interpolation)] RMSE=1.919 and loss=44.418


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=1.735 and loss=30.611
[Test (Interpolation)] RMSE=1.886 and loss=43.598


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=1.847 and loss=33.423
[Test (Interpolation)] RMSE=1.862 and loss=45.118


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=1.659 and loss=29.421
[Test (Interpolation)] RMSE=1.631 and loss=48.067


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=1.921 and loss=32.363
[Test (Interpolation)] RMSE=1.850 and loss=48.052


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=1.629 and loss=32.823
[Test (Interpolation)] RMSE=1.778 and loss=47.700


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=2.235 and loss=30.423
[Test (Interpolation)] RMSE=1.593 and loss=42.248


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=1.662 and loss=32.167
[Test (Interpolation)] RMSE=1.734 and loss=45.947


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=1.935 and loss=32.201
[Test (Interpolation)] RMSE=1.718 and loss=46.738


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=1.677 and loss=32.601
[Test (Interpolation)] RMSE=1.939 and loss=46.343


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=2.032 and loss=36.327
[Test (Interpolation)] RMSE=1.890 and loss=48.421


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=2.015 and loss=37.617
[Test (Interpolation)] RMSE=1.819 and loss=46.190


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=1.600 and loss=32.988
[Test (Interpolation)] RMSE=1.839 and loss=49.569


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=1.406 and loss=30.879
[Test (Interpolation)] RMSE=1.638 and loss=44.035


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=1.938 and loss=33.291
[Test (Interpolation)] RMSE=1.981 and loss=51.868


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


[Test (Extrapolation)] RMSE=1.727 and loss=34.736
[Test (Interpolation)] RMSE=1.826 and loss=46.504

Extrapolation
Mean:  1.8470439021672622
Std:  0.19650450645854264


Interpolation
Mean:  1.8022884989435002
Std:  0.11746959708331613


F
Mean:  0.8928573491529477
Std:  0.09050752483344789


In [None]:
print('Extrapolation')
print('Mean: ', test1_mean)
print('Std: ', test1_std)
print('\n')
print('Interpolation')
print('Mean: ', test2_mean)
print('Std: ', test2_std)
print('\n')
print('F')
print('Mean: ', F_mean)
print('Std: ', F_std)

Extrapolation
Mean:  1.8447034882588624
Std:  0.19108703741809022


Interpolation
Mean:  1.794005695357486
Std:  0.11338433247485936


F
Mean:  0.8923388156361856
Std:  0.08536524283251234


## RNN

In [None]:
model_config = {}
model_config['input_dim'] = 4
model_config['hidden_dim'] = 20
model_config['output_dim'] = 1

train_config = {}
train_config['n_epochs'] = 10
train_config['lr'] = 0.01
train_config['n_adapt'] = 2
train_config['inner_lr'] = 0.1
train_config['l2_penalty'] = 0.001

# val_result = param_search(validation_datasets['dataframes'], model_config, train_config,
#                           random_effects_column_names=['times'],
#                           group_column_name='group', y_column_name='y',
#                           n_samples_chosen_per_group_list=datasets['n_samples_chosen_per_group'],
#                           model_type='LSTM')

# print(val_result)

In [None]:
print(val_result)

In [None]:
test1_mean, test1_std, test2_mean, test2_std, F_mean, F_std = test(datasets['dataframes'], model_config, train_config, random_effects_column_names=['times'],
                                                                   group_column_name='group', y_column_name='y',
                                                                   n_samples_chosen_per_group_list=datasets['n_samples_chosen_per_group'],
                                                                   model_type='LSTM')

In [None]:
print('Extrapolation')
print('Mean: ', test1_mean)
print('Std: ', test1_std)
print('\n')
print('Interpolation')
print('Mean: ', test2_mean)
print('Std: ', test2_std)
print('\n')
print('F')
print('Mean: ', F_mean)
print('Std: ', F_std)

Extrapolation
Mean:  1.9430961547980725
Std:  0.23743332689318258


Interpolation
Mean:  1.9283003302247994
Std:  0.1511101017986711


F
Mean:  1.2107607878822475
Std:  0.12928396752799812


# Experiment 2: Friedman 3 Function with Temporal Individual GP

In [None]:
# Generate data
n, m = 1000, 50  # Number of observations and groups
p = int(n/m) # Number of observations per group
n_datasets = 20
n_valid = 5
datasets, validation_datasets = generate_datasets(n, m, p, n_datasets, n_valid, func='make_friedman3', shared_gp=False, random_state=75)

## MLP

In [None]:
model_config = {}
model_config['input_dim'] = 4
model_config['hidden_dim'] = 100
model_config['output_dim'] = 1

train_config = {}
train_config['n_epochs'] = 20
train_config['lr'] = 0.005
train_config['n_adapt'] = 2
train_config['inner_lr'] = 0.1
train_config['l2_penalty'] = 0.001

# val_result = param_search(validation_datasets['dataframes'], model_config, train_config,
#                           random_effects_column_names=['times'],
#                           group_column_name='group', y_column_name='y',
#                           n_samples_chosen_per_group_list=datasets['n_samples_chosen_per_group'],
#                           model_type='MLP')

# print(val_result)

In [None]:
print(val_result)

4.821257044677564


In [None]:
test1_mean, test1_std, test2_mean, test2_std, F_mean, F_std = test(datasets['dataframes'], model_config, train_config, random_effects_column_names=['times'],
                                                                   group_column_name='group', y_column_name='y',
                                                                   n_samples_chosen_per_group_list=datasets['n_samples_chosen_per_group'],
                                                                   model_type='MLP')

In [None]:
print('Extrapolation')
print('Mean: ', test1_mean)
print('Std: ', test1_std)
print('\n')
print('Interpolation')
print('Mean: ', test2_mean)
print('Std: ', test2_std)
print('\n')
print('F')
print('Mean: ', F_mean)
print('Std: ', F_std)

Extrapolation
Mean:  7.380636263514949
Std:  2.7219344255093914


Interpolation
Mean:  8.19656902792111
Std:  4.18327649052143


F
Mean:  1.0923493831100162
Std:  0.22262280923828884


## RNN

In [None]:
model_config = {}
model_config['input_dim'] = 4
model_config['hidden_dim'] = 100
model_config['output_dim'] = 1

train_config = {}
train_config['n_epochs'] = 20
train_config['lr'] = 0.01
train_config['n_adapt'] = 2
train_config['inner_lr'] = 0.1
train_config['l2_penalty'] = 0.001

# val_result = param_search(validation_datasets['dataframes'], model_config, train_config,
#                           random_effects_column_names=['times'],
#                           group_column_name='group', y_column_name='y',
#                           n_samples_chosen_per_group_list=datasets['n_samples_chosen_per_group'],
#                           model_type='LSTM')

# print(val_result)

In [None]:
print(val_result)

4.852100091455474


In [None]:
test1_mean, test1_std, test2_mean, test2_std, F_mean, F_std = test(datasets['dataframes'], model_config, train_config, random_effects_column_names=['times'],
                                                                   group_column_name='group', y_column_name='y',
                                                                   n_samples_chosen_per_group_list=datasets['n_samples_chosen_per_group'],
                                                                   model_type='LSTM')

In [None]:
print('Extrapolation')
print('Mean: ', test1_mean)
print('Std: ', test1_std)
print('\n')
print('Interpolation')
print('Mean: ', test2_mean)
print('Std: ', test2_std)
print('\n')
print('F')
print('Mean: ', F_mean)
print('Std: ', F_std)

Extrapolation
Mean:  7.395978922721264
Std:  2.719650446601197


Interpolation
Mean:  8.190454477459078
Std:  4.1982947082181346


F
Mean:  1.216642040096025
Std:  0.23131919213913554


# Experiment 3: Linear Function with Temporal Shared GP

In [None]:
# Generate data
n, m = 1000, 50  # Number of observations and groups
p = int(n/m) # Number of observations per group
n_datasets = 20
n_valid = 5
datasets, validation_datasets = generate_datasets(n, m, p, n_datasets, n_valid, func='linear_func', random_state=60)

## MLP

In [None]:
model_config = {}
model_config['input_dim'] = 4
model_config['hidden_dim'] = 20
model_config['output_dim'] = 1

train_config = {}
train_config['n_epochs'] = 15
train_config['lr'] = 0.001
train_config['n_adapt'] = 2
train_config['inner_lr'] = 0.1
train_config['l2_penalty'] = 0.001

# val_result = param_search(validation_datasets['dataframes'], model_config, train_config,
#                           random_effects_column_names=['times'],
#                           group_column_name='group', y_column_name='y',
#                           n_samples_chosen_per_group_list=datasets['n_samples_chosen_per_group'],
#                           model_type='MLP')

# print(val_result)

In [None]:
print(val_result)

1.6005159741630943


In [None]:
test1_mean, test1_std, test2_mean, test2_std, F_mean, F_std = test(datasets['dataframes'], model_config, train_config, random_effects_column_names=['times'],
                                                                   group_column_name='group', y_column_name='y',
                                                                   n_samples_chosen_per_group_list=datasets['n_samples_chosen_per_group'],
                                                                   model_type='MLP')

In [None]:
print('Extrapolation')
print('Mean: ', test1_mean)
print('Std: ', test1_std)
print('\n')
print('Interpolation')
print('Mean: ', test2_mean)
print('Std: ', test2_std)
print('\n')
print('F')
print('Mean: ', F_mean)
print('Std: ', F_std)

Extrapolation
Mean:  1.6149160531213957
Std:  0.15618772975939785


Interpolation
Mean:  1.5287743377945733
Std:  0.127347176465175


F
Mean:  0.45677609070681857
Std:  0.03791896279041083


## RNN

In [None]:
model_config = {}
model_config['input_dim'] = 4
model_config['hidden_dim'] = 20
model_config['output_dim'] = 1

train_config = {}
train_config['n_epochs'] = 15
train_config['lr'] = 0.01
train_config['n_adapt'] = 2
train_config['inner_lr'] = 0.1
train_config['l2_penalty'] = 0.001

# val_result = param_search(validation_datasets['dataframes'], model_config, train_config,
#                           random_effects_column_names=['times'],
#                           group_column_name='group', y_column_name='y',
#                           n_samples_chosen_per_group_list=datasets['n_samples_chosen_per_group'],
#                           model_type='LSTM')

# print(val_result)

In [None]:
print(val_result)

1.5656870395772609


In [None]:
test1_mean, test1_std, test2_mean, test2_std, F_mean, F_std = test(datasets['dataframes'], model_config, train_config, random_effects_column_names=['times'],
                                                                   group_column_name='group', y_column_name='y',
                                                                   n_samples_chosen_per_group_list=datasets['n_samples_chosen_per_group'],
                                                                   model_type='LSTM')

In [None]:
print('Extrapolation')
print('Mean: ', test1_mean)
print('Std: ', test1_std)
print('\n')
print('Interpolation')
print('Mean: ', test2_mean)
print('Std: ', test2_std)
print('\n')
print('F')
print('Mean: ', F_mean)
print('Std: ', F_std)

Extrapolation
Mean:  1.6057474921594834
Std:  0.17254362471900006


Interpolation
Mean:  1.5226190798350194
Std:  0.1381481663932315


F
Mean:  0.3921212515412324
Std:  0.1831173929132119


# Experiment 4: Linear Function with Temporal Independent GP

In [None]:
# Generate data
n, m = 1000, 50  # Number of observations and groups
p = int(n/m) # Number of observations per group
n_datasets = 20
n_valid = 5
datasets, validation_datasets = generate_datasets(n, m, p, n_datasets, n_valid, func='linear_func', shared_gp=False, random_state=40)

## MLP

In [None]:
model_config = {}
model_config['input_dim'] = 4
model_config['hidden_dim'] = 100
model_config['output_dim'] = 1

train_config = {}
train_config['n_epochs'] = 20
train_config['lr'] = 0.01
train_config['n_adapt'] = 2
train_config['inner_lr'] = 0.1
train_config['l2_penalty'] = 0.001

# val_result = param_search(validation_datasets['dataframes'], model_config, train_config,
#                           random_effects_column_names=['times'],
#                           group_column_name='group', y_column_name='y',
#                           n_samples_chosen_per_group_list=datasets['n_samples_chosen_per_group'],
#                           model_type='MLP')
# print(val_result)

In [None]:
print(val_result)

7.03313279163515


In [None]:
test1_mean, test1_std, test2_mean, test2_std, F_mean, F_std = test(datasets['dataframes'], model_config, train_config, random_effects_column_names=['times'],
                                                                   group_column_name='group', y_column_name='y',
                                                                   n_samples_chosen_per_group_list=datasets['n_samples_chosen_per_group'],
                                                                   model_type='MLP')

In [None]:
print('Extrapolation')
print('Mean: ', test1_mean)
print('Std: ', test1_std)
print('\n')
print('Interpolation')
print('Mean: ', test2_mean)
print('Std: ', test2_std)
print('\n')
print('F')
print('Mean: ', F_mean)
print('Std: ', F_std)

Extrapolation
Mean:  5.630500108506405
Std:  1.8809425532926864


Interpolation
Mean:  6.97632089358054
Std:  4.341562186029008


F
Mean:  0.7174793200806473
Std:  0.26452146922937897


## RNN

In [None]:
model_config = {}
model_config['input_dim'] = 4
model_config['hidden_dim'] = 50
model_config['output_dim'] = 1

train_config = {}
train_config['n_epochs'] = 10
train_config['lr'] = 0.01
train_config['n_adapt'] = 2
train_config['inner_lr'] = 0.1
train_config['l2_penalty'] = 0.001

# val_result = param_search(validation_datasets['dataframes'], model_config, train_config,
#                           random_effects_column_names=['times'],
#                           group_column_name='group', y_column_name='y',
#                           n_samples_chosen_per_group_list=datasets['n_samples_chosen_per_group'],
#                           model_type='LSTM')

# print(val_result)

In [None]:
print(val_result)

7.519978710254309


In [None]:
test1_mean, test1_std, test2_mean, test2_std, F_mean, F_std = test(datasets['dataframes'], model_config, train_config, random_effects_column_names=['times'],
                                                                   group_column_name='group', y_column_name='y',
                                                                   n_samples_chosen_per_group_list=datasets['n_samples_chosen_per_group'],
                                                                   model_type='LSTM')

In [None]:
print('Extrapolation')
print('Mean: ', test1_mean)
print('Std: ', test1_std)
print('\n')
print('Interpolation')
print('Mean: ', test2_mean)
print('Std: ', test2_std)
print('\n')
print('F')
print('Mean: ', F_mean)
print('Std: ', F_std)

Extrapolation
Mean:  5.730281490367273
Std:  1.9782859830486084


Interpolation
Mean:  7.045151985869337
Std:  4.3136252085097


F
Mean:  1.1095539011118505
Std:  0.7157333099158796
