In [None]:
import numpy as np
from dataclasses import dataclass
from datetime import datetime
import json
import os
import sys
sys.path.insert(1, '/path/to/application/app/folder')
time_now = datetime.today().ctime()
today_date = datetime.today().date()

# packages for plotting
import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager
import urllib.request

urllib.request.urlretrieve('https://github.com/google/fonts/raw/main/ofl/ibmplexmono/IBMPlexMono-Regular.ttf', 'IBMPlexMono-Regular.ttf')
fe = font_manager.FontEntry(
    fname='IBMPlexMono-Regular.ttf',
    name='plexmono')
font_manager.fontManager.ttflist.append(fe)
plt.rcParams.update({'axes.facecolor':'#f5f4e9',
            'grid.color' : '#AAAAAA',
            'axes.edgecolor':'#333333',
            'figure.facecolor':'#FFFFFF',
            'axes.grid': False,
            'axes.prop_cycle':   plt.cycler('color', plt.cm.Dark2.colors),
            'font.family': fe.name,
            'figure.figsize': (3.5,3.5 / 1.2),
            'ytick.left': True,
            'xtick.bottom': True   ,
            'figure.dpi': 300
           })

In [None]:
# select either threshold OR split to divide sensitive vs non-sensitive
censor_threshold = None
censor_region = 'above'
censor_split = 0.1             # 10% sensitive data, 90% non-sensitive data

In [None]:
@dataclass
class Config:
    Din: int = 50                 # dim of features
    hidden_dim: int = 64
    batchsize: int = 32
    datasize: int = 6400
    split: float = 0.1            # 10/10/80 test val train
    epochs: int = 60
    lr: float = 0.001
    patience: int = 5
    min_delta: float = 1e-4       # for early stopping

config = Config()
seed = 511
dir_name = f'OUTPUTS/all_results/mlp_ynoise_results_split{censor_split}_{censor_region}'
fig_dir_name = f'OUTPUTS/figures/'
os.makedirs(dir_name, exist_ok=True)
os.makedirs(fig_dir_name, exist_ok=True)

In [None]:
y_noise_levels = np.linspace(0, 10, int(2/0.2+1))
y_tasks = []
for y_level in y_noise_levels:
    task = (f"y noise level {y_level}", 0, y_level, False, None)
    print(task)
    y_tasks.append(task)

len(y_tasks)

In [None]:
from mlp_fxns import mlptask_wrapper

num_trials = 5
all_trials_results = []
for trial in range(num_trials):
    trial_seed = seed + trial
    print(f'\033[46mTrial {trial+1}, Seed: {trial_seed}\033[0m')
    results = mlptask_wrapper(
        trial_seed, 
        y_tasks, 
        censor_region, 
        censor_split,
        model_config=config,
        verbose=True, 
        sanitycheckplot=False
    )
    all_trials_results.append(results)
    

try: 
    with open(f'{dir_name}/history.json','a') as f:
        f.write(f'\nRun from today: {time_now}\n')
        json.dump(all_trials_results, f, indent=4)
except Exception as e:
    template = "An exception of type {0} occurred. Arguments:\n{1!r}"
    message = template.format(type(e).__name__, e.args)
    print(message)

In [None]:
from plot_fxns import plot_trainingcurves, plot_parityplots

# check the results of last trial 
plot_trainingcurves(y_tasks[:4], results)
plot_parityplots(y_tasks[:4], results, threshold=results['censor_threshold'])

In [None]:
# calculate mean & std for all trials
aggregated_results ={
    'x_noise_level': 0,
    'y_noise_level': list(y_noise_levels),
    'omit': False,
    'overall_error_mean': [],
    'overall_error_std': [],
    'lower_error_mean': [],
    'lower_error_std': [],
    'upper_error_mean': [],
    'upper_error_std': [],
}

for y_level in y_noise_levels:
    task_name = f"y noise level {y_level}"
    overall_errors = []
    lower_errors = []
    upper_errors = []
    for result in all_trials_results:
        overall_errors.append(result['overall_error'][task_name])
        lower_errors.append(result['lower_error'][task_name])
        upper_errors.append(result['upper_error'][task_name])
        
    aggregated_results['overall_error_mean'].append(np.mean(overall_errors))
    aggregated_results['overall_error_std'].append(np.std(overall_errors))
    aggregated_results['lower_error_mean'].append(np.mean(lower_errors))
    aggregated_results['lower_error_std'].append(np.std(lower_errors))
    aggregated_results['upper_error_mean'].append(np.mean(upper_errors))
    aggregated_results['upper_error_std'].append(np.std(upper_errors))

In [None]:
# check censor_threshold
# generated data was different every time the seed changes

censor_thresholds = [result['censor_threshold'] for result in all_trials_results]
print(censor_thresholds)

In [None]:
from plot_fxns import create_dataframe

df = create_dataframe(aggregated_results)
df.to_json(f'{dir_name}/summary_{today_date}.json')
df

In [None]:
# df = pd.read_json(f'{dir_name}/summary_{today_date}.json')

plt.figure(figsize=(4,4))

# TODO: Add omission method to compare. Make the lines dashed. Add 'omission' text nearby (or include it in legends)
# omit_s0 = main_results['lower_error']['omission'] 
# omit_s1 = main_results['upper_error']['omission']
# plt.axhline(y=omit_s0, xmin=0, xmax=2, alpha=0.7)
# plt.axhline(y=omit_s1, xmin=0, xmax=2, c='C1', alpha=0.7)

# plt.plot(df_y['y noise'], df_y['s=0 RMSE'], marker='o', label='Non-sensitive region')
# plt.plot(df_y['y noise'], df_y['s=1 RMSE'], marker='o', label='Sensitive region')
plt.errorbar(df['y noise'], df['s=1 RMSE'], yerr=df['s=1 RMSE std'], marker='o', label='Sensitive data', capsize=5)
plt.errorbar(df['y noise'], df['s=0 RMSE'], yerr=df['s=0 RMSE std'], marker='o', label='Non-sensitive data', capsize=5)
plt.title('Test Errors')
plt.xlabel('y noise level')
plt.ylabel('RMSE')
plt.legend()
plt.ylim(0,1.8)
#plt.xticks(x_ticks[1::2])
plt.grid(True)
plt.tight_layout()
plt.savefig(f'{fig_dir_name}/mlp_testerror_ynoise_split{censor_split}_{censor_region}_{today_date}.png',dpi=300)
plt.show()

In [None]:
# df = pd.read_json(f'{dir_name}/summary_{today_date}.json')

plt.plot(df['y noise'], df['s=1 RMSE'], label='Sensitive data', marker='o', markersize=2)
plt.fill_between(df['y noise'], df['s=1 RMSE'] - df['s=1 RMSE std'], df['s=1 RMSE'] + df['s=1 RMSE std'], alpha=0.2)

plt.plot(df['y noise'], df['s=0 RMSE'], label='Non-sensitive data', marker='o', markersize=2)
plt.fill_between(df['y noise'], df['s=0 RMSE'] - df['s=0 RMSE std'], df['s=0 RMSE'] + df['s=0 RMSE std'], alpha=0.2)

plt.title('Test Errors')
plt.xlabel('y noise level')
plt.ylabel('RMSE')
plt.ylim(0,1.8)
plt.xlim(0,10)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig(f'{fig_dir_name}/mlp_testerror_ynoise_split{censor_split}_{censor_region}_{today_date}_v2.png',dpi=300)
plt.show()