In [1]:
import numpy as np
import matplotlib.pyplot as plt

exps_dir = '../experiments'
exps = {
    'agem': [f'agem_cub-{i:05d}' for i in range(2, 12)],
    'basic': [f'basic_cub-{i:05d}' for i in range(2, 12)],
    'ewc': [f'ewc_cub-{i:05d}' for i in range(1, 11)],
    'mas': [f'mas_cub-{i:05d}' for i in range(1, 11)]
}
accs_histories = {exp: np.array([np.load(f'{exps_dir}/{p}/custom_data/accs_history.npy') for p in exps[exp]]) for exp in exps}
zst_accs = {exp: np.array([np.load(f'{exps_dir}/{p}/custom_data/zst_accs.npy') for p in exps[exp]]) for exp in exps}
lca_scores = {exp: np.array([np.load(f'{exps_dir}/{p}/custom_data/test_acc_batch_histories.npy') for p in exps[exp]]) for exp in exps}

plt.figure(figsize=(15, 7))
plt.title('ZST accuracy on CUB')

# # plt.plot(np.arange(1, 21), agem_scores.mean(axis=0), color='red', marker='^', label='A-GEM old (5 runs)')
# # plt.fill_between(np.arange(1, 21),
# #          agem_scores.mean(axis=0) + agem_scores.std(axis=0),
# #          agem_scores.mean(axis=0) - agem_scores.std(axis=0),
# #          color='red', alpha=0.05)

# # plt.plot(np.arange(1, 21), basic_scores.mean(axis=0), color='blue', label='Baseline (averaged across 5 runs)')
# # plt.fill_between(np.arange(1, 21),
# #          basic_scores.mean(axis=0) + basic_scores.std(axis=0),
# #          basic_scores.mean(axis=0) - basic_scores.std(axis=0),
# #          color='blue', alpha=0.05)

plt.plot(np.arange(1, 21), zst_accs['basic'].mean(axis=0), label='Basic')
plt.plot(np.arange(1, 21), zst_accs['agem'].mean(axis=0), label='A-GEM')
plt.plot(np.arange(1, 21), zst_accs['ewc'].mean(axis=0), label='EWC')
plt.plot(np.arange(1, 21), zst_accs['mas'].mean(axis=0), label='MAS')

plt.xlabel('Task ID')
plt.ylabel('ZST accuracy for a task')
plt.xticks(np.arange(1, 21))
plt.legend()
plt.grid()

In [12]:
import sys; sys.path.append('..')
from src.utils.metrics import compute_average_accuracy, compute_forgetting_measure, compute_learning_curve_area

for key in ['basic', 'ewc', 'mas', 'agem']:
    print(f'Average [LCA10] score for {key}: {np.mean([compute_learning_curve_area(ss) for ss in lca_scores[key]]):.5f}')

Average [LCA10] score for basic: 0.58790
Average [LCA10] score for ewc: 0.58936
Average [LCA10] score for mas: 0.59646
Average [LCA10] score for agem: 0.60620


In [10]:
for key in ['basic', 'ewc', 'mas', 'agem']:
    print(f'Average [Average Accuracy] score for {key}: {100 * np.mean([compute_average_accuracy(ss) for ss in accs_histories[key]]):.3f}')

Average [Average Accuracy] score for basic: 68.838
Average [Average Accuracy] score for ewc: 69.455
Average [Average Accuracy] score for mas: 70.674
Average [Average Accuracy] score for agem: 72.916


In [7]:
for key in ['basic', 'ewc', 'mas', 'agem']:
    print(f'Average [Forgetting Measure] score for {key}: {np.mean([compute_forgetting_measure(ss) for ss in accs_histories[key]]):.5f}')

Average [Forgetting Measure] score for basic: 0.11280
Average [Forgetting Measure] score for ewc: 0.10755
Average [Forgetting Measure] score for mas: 0.09701
Average [Forgetting Measure] score for agem: 0.07884
