# Create arrays for ResNet-18 on ImageNet

In [1]:
# Imports
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import itertools
from scripts.analysis_helpers import load_data, prep_condition, order_econs
from scripts.analysis_helpers import main_plot, three_plots, heat_plot, plot_decisions, plot_decisions_change, main_plot_both
from scripts.analysis_helpers import calc_econ
from scripts.data import init_dataset
import seaborn as sns
from sklearn.metrics import cohen_kappa_score

In [2]:
# Set base_network for which to build dataframe
# "Res18" (Res18 on ImageNet)
# "Res18fc100" (Res18 on Gaussian)
# "Res18CIFAR" (Res18 on CIFAR-100)
# "Rebuttal" (SOTA on ImageNet)
# "VGG11" (VGG-11 on ImageNet)
# "Dense121" (DenseNet-121 on ImageNet)
base_network = "Res18CIFAR" 

# Set flag whether to correct for label errors
correct = False

# To make superimp plot without combined condition, enable this flag
remove_combined = False

# Set base network and conditions
folder_name = f"./results/{base_network}/"
figure_path = f"./figures/{base_network}/"

conditions = [f"{base_network}_Base_condition",
              f"{base_network}_Plus_1ep",
              f"{base_network}_Plus_10ep",
              f"{base_network}_Different_optimizer",
              f"{base_network}_Different_batchsize",
              f"{base_network}_Different_initialisation",
              f"{base_network}_Different_LR",
              f"{base_network}_CUDA_nondeterministic",
              f"{base_network}_Different_dataorder",
              f"{base_network}_Different_architecture",
              f"{base_network}_Different_data",
              f"{base_network}_Half_data",
              f"{base_network}_Combined_condition"]

## Set necessary variables

In [3]:
# Get base array from from "analysis_helpers.prep_condition"
base, _, _, _, num_base_epochs = prep_condition(f"{base_network}_Base_condition", folder_name, base_network)

# Load label error indices, file taken from https://github.com/cleanlab/cleanlab
label_error = np.load("imagenet_val_ident.npy", allow_pickle=True)[:, 2].astype(np.int)
error_inds = np.where(label_error == 1)
right_inds = np.where(label_error == 0)

## 1. Build main array: contains error consistencies to base model

In [4]:
# Pre-allocate main array
main_array = np.zeros((num_base_epochs, len(conditions), 6), dtype=object)
main_array[:] = np.nan

# Loop through conditions to make error consistency and krippendorf arrays
for ind, condition in enumerate(conditions):

    # Load data, conditions with more epochs than base are stored in base file
    results, val_acc, num_epochs, num_models, num_base_epochs = prep_condition(condition, folder_name, base_network)
    
    # Init array for error consistency with base network and with subsequent epoch of same network
    econ_tobase = np.zeros((num_models, num_epochs))
    
    # Loop through models and epochs to get error consistency for all epochs of all models of this condition
    for model in range(num_models):
            
        # Loop through epochs
        for epoch in range(num_epochs):

            # Print where we are
            print(f"Condition: {condition}, Model: {model}, Epoch: {epoch}")
            
            # The two networks with completely separate data sets are only compared to each other
            if condition.endswith("Different_data") == False:
                
                # For condition "Plus_1ep", epoch 2 is compared to epoch 1 of the base model
                # However, for epoch 92, epoch 91 does not exist for the base model
                # Therefore, it is then compared to the last epoch of the base model (num_base_epochs-1)
                if epoch < num_base_epochs-1:
                    
                    # For the "Plus_1ep" condition, we can only calculate the error consistency from epoch 1 (not 0)
                    if condition.endswith("Plus_1ep") and epoch > 0:
                        econ_tobase[model, epoch] = calc_econ(results, base, 
                                                              model, 0, 
                                                              epoch, epoch-1,
                                                              error_inds, correct)
                        
                    # For the "Plus_10ep" condition, we can only calculate the error consistency from epoch 10
                    elif condition.endswith("Plus_10ep") and epoch > 9:
                        econ_tobase[model, epoch] = calc_econ(results, base, 
                                                              model, 0, 
                                                              epoch, epoch-10,
                                                              error_inds, correct)
                        
                    # For all other conditions, use the same epoch index for base network and current condition
                    else:
                        econ_tobase[model, epoch] = calc_econ(results, base, 
                                                              model, 0, 
                                                              epoch, epoch,
                                                              error_inds, correct)
                        
                # Since epoch only exceeds "num_base_epochs-1" in the "Plus_1ep" and "Plus_10ep" conditions
                # We use this to compare the epochs above "num_base_epochs-1" with the last epoch of the base model 
                else:
                    econ_tobase[model, epoch] = calc_econ(results, base, 
                                                          model, 0, 
                                                          epoch, num_base_epochs-1,
                                                          error_inds, correct)


            # Only for different data compared condition
            else:
                
                # The two networks with completely separate data sets are only compared to each other
                econ_tobase[model, epoch] = calc_econ(results, results, 
                                                      0+model, 1-model, 
                                                      epoch, epoch,
                                                      error_inds, correct)
                    
            # For "Plus_1ep" condition, entry 0 is from epoch 1 and so on
            if condition.endswith("Plus_1ep") and epoch > 0:
                main_array[epoch-1, ind, 0] = condition.replace(base_network, "").replace("_", "\n")
                main_array[epoch-1, ind, model+1] = econ_tobase[model, epoch]

            # For "Plus_10ep" condition, entry 0 is from epoch 10 and so on
            elif condition.endswith("Plus_10ep") and epoch > 9:
                main_array[epoch-10, ind, 0] = condition.replace(base_network, "").replace("_", "\n")
                main_array[epoch-10, ind, model+1] = econ_tobase[model, epoch]

            # For all other conditions, just add the error consistencies from this epoch to the array
            else:
                main_array[epoch, ind, 0] = condition.replace(base_network, "").replace("_", "\n")
                main_array[epoch, ind, model+1] = econ_tobase[model, epoch]
                
# Save array
np.save(folder_name + f"{base_network}_main_array", main_array)

Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 0
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 1
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 2
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 3
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 4
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 5
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 6
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 7
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 8
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 9
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 10
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 11
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 12
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 13
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 14
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 15
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 16
Condition: Res18CIFAR_Ba

Condition: Res18CIFAR_Different_initialisation, Model: 0, Epoch: 0
Condition: Res18CIFAR_Different_initialisation, Model: 0, Epoch: 1
Condition: Res18CIFAR_Different_initialisation, Model: 0, Epoch: 2
Condition: Res18CIFAR_Different_initialisation, Model: 0, Epoch: 3
Condition: Res18CIFAR_Different_initialisation, Model: 0, Epoch: 4
Condition: Res18CIFAR_Different_initialisation, Model: 0, Epoch: 5
Condition: Res18CIFAR_Different_initialisation, Model: 0, Epoch: 6
Condition: Res18CIFAR_Different_initialisation, Model: 0, Epoch: 7
Condition: Res18CIFAR_Different_initialisation, Model: 0, Epoch: 8
Condition: Res18CIFAR_Different_initialisation, Model: 0, Epoch: 9
Condition: Res18CIFAR_Different_initialisation, Model: 0, Epoch: 10
Condition: Res18CIFAR_Different_initialisation, Model: 0, Epoch: 11
Condition: Res18CIFAR_Different_initialisation, Model: 0, Epoch: 12
Condition: Res18CIFAR_Different_initialisation, Model: 0, Epoch: 13
Condition: Res18CIFAR_Different_initialisation, Model: 0, 

Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 14
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 15
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 16
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 17
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 18
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 19
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 20
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 21
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 22
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 23
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 24
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 25
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 26
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 27
Condition: Res18CIFAR_Different_initialisation, 

Condition: Res18CIFAR_Different_LR, Model: 4, Epoch: 16
Condition: Res18CIFAR_Different_LR, Model: 4, Epoch: 17
Condition: Res18CIFAR_Different_LR, Model: 4, Epoch: 18
Condition: Res18CIFAR_Different_LR, Model: 4, Epoch: 19
Condition: Res18CIFAR_Different_LR, Model: 4, Epoch: 20
Condition: Res18CIFAR_Different_LR, Model: 4, Epoch: 21
Condition: Res18CIFAR_Different_LR, Model: 4, Epoch: 22
Condition: Res18CIFAR_Different_LR, Model: 4, Epoch: 23
Condition: Res18CIFAR_Different_LR, Model: 4, Epoch: 24
Condition: Res18CIFAR_Different_LR, Model: 4, Epoch: 25
Condition: Res18CIFAR_Different_LR, Model: 4, Epoch: 26
Condition: Res18CIFAR_Different_LR, Model: 4, Epoch: 27
Condition: Res18CIFAR_Different_LR, Model: 4, Epoch: 28
Condition: Res18CIFAR_Different_LR, Model: 4, Epoch: 29
Condition: Res18CIFAR_Different_LR, Model: 4, Epoch: 30
Condition: Res18CIFAR_CUDA_nondeterministic, Model: 0, Epoch: 0
Condition: Res18CIFAR_CUDA_nondeterministic, Model: 0, Epoch: 1
Condition: Res18CIFAR_CUDA_nonde

Condition: Res18CIFAR_CUDA_nondeterministic, Model: 4, Epoch: 16
Condition: Res18CIFAR_CUDA_nondeterministic, Model: 4, Epoch: 17
Condition: Res18CIFAR_CUDA_nondeterministic, Model: 4, Epoch: 18
Condition: Res18CIFAR_CUDA_nondeterministic, Model: 4, Epoch: 19
Condition: Res18CIFAR_CUDA_nondeterministic, Model: 4, Epoch: 20
Condition: Res18CIFAR_CUDA_nondeterministic, Model: 4, Epoch: 21
Condition: Res18CIFAR_CUDA_nondeterministic, Model: 4, Epoch: 22
Condition: Res18CIFAR_CUDA_nondeterministic, Model: 4, Epoch: 23
Condition: Res18CIFAR_CUDA_nondeterministic, Model: 4, Epoch: 24
Condition: Res18CIFAR_CUDA_nondeterministic, Model: 4, Epoch: 25
Condition: Res18CIFAR_CUDA_nondeterministic, Model: 4, Epoch: 26
Condition: Res18CIFAR_CUDA_nondeterministic, Model: 4, Epoch: 27
Condition: Res18CIFAR_CUDA_nondeterministic, Model: 4, Epoch: 28
Condition: Res18CIFAR_CUDA_nondeterministic, Model: 4, Epoch: 29
Condition: Res18CIFAR_CUDA_nondeterministic, Model: 4, Epoch: 30
Condition: Res18CIFAR_Dif

Condition: Res18CIFAR_Different_dataorder, Model: 4, Epoch: 16
Condition: Res18CIFAR_Different_dataorder, Model: 4, Epoch: 17
Condition: Res18CIFAR_Different_dataorder, Model: 4, Epoch: 18
Condition: Res18CIFAR_Different_dataorder, Model: 4, Epoch: 19
Condition: Res18CIFAR_Different_dataorder, Model: 4, Epoch: 20
Condition: Res18CIFAR_Different_dataorder, Model: 4, Epoch: 21
Condition: Res18CIFAR_Different_dataorder, Model: 4, Epoch: 22
Condition: Res18CIFAR_Different_dataorder, Model: 4, Epoch: 23
Condition: Res18CIFAR_Different_dataorder, Model: 4, Epoch: 24
Condition: Res18CIFAR_Different_dataorder, Model: 4, Epoch: 25
Condition: Res18CIFAR_Different_dataorder, Model: 4, Epoch: 26
Condition: Res18CIFAR_Different_dataorder, Model: 4, Epoch: 27
Condition: Res18CIFAR_Different_dataorder, Model: 4, Epoch: 28
Condition: Res18CIFAR_Different_dataorder, Model: 4, Epoch: 29
Condition: Res18CIFAR_Different_dataorder, Model: 4, Epoch: 30
Condition: Res18CIFAR_Different_architecture, Model: 0,

Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 4
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 5
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 6
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 7
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 8
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 9
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 10
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 11
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 12
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 13
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 14
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 15
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 16
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 17
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 18
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 19
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 20
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 21
Condition: Res18CIFAR_Half_data, Model: 1, Epoch: 22

Condition: Res18CIFAR_Combined_condition, Model: 4, Epoch: 14
Condition: Res18CIFAR_Combined_condition, Model: 4, Epoch: 15
Condition: Res18CIFAR_Combined_condition, Model: 4, Epoch: 16
Condition: Res18CIFAR_Combined_condition, Model: 4, Epoch: 17
Condition: Res18CIFAR_Combined_condition, Model: 4, Epoch: 18
Condition: Res18CIFAR_Combined_condition, Model: 4, Epoch: 19
Condition: Res18CIFAR_Combined_condition, Model: 4, Epoch: 20
Condition: Res18CIFAR_Combined_condition, Model: 4, Epoch: 21
Condition: Res18CIFAR_Combined_condition, Model: 4, Epoch: 22
Condition: Res18CIFAR_Combined_condition, Model: 4, Epoch: 23
Condition: Res18CIFAR_Combined_condition, Model: 4, Epoch: 24
Condition: Res18CIFAR_Combined_condition, Model: 4, Epoch: 25
Condition: Res18CIFAR_Combined_condition, Model: 4, Epoch: 26
Condition: Res18CIFAR_Combined_condition, Model: 4, Epoch: 27
Condition: Res18CIFAR_Combined_condition, Model: 4, Epoch: 28
Condition: Res18CIFAR_Combined_condition, Model: 4, Epoch: 29
Conditio

## 2. Make decision arrays

In [5]:
# If errors should be correct, subtract the 5000 error images from total number of Images
if correct:

    # Pre-allocate decisions dataframes. Shape: (number of conditions, number of viable images, number of epochs)
    decisions = np.zeros((len(conditions), len(base[0][0][1])-len(error_inds[0]), num_base_epochs))
    decisions_correct = np.zeros((len(conditions), len(base[0][0][1])-len(error_inds[0]), num_base_epochs))
    decisions_change = np.zeros((len(conditions), num_base_epochs - 1))
    
# If errors should not be corrected, don't subtract number of error images
else:
    decisions = np.zeros((len(conditions), len(base[0][0][1]), num_base_epochs))
    decisions_correct = np.zeros((len(conditions), len(base[0][0][1]), num_base_epochs))
    decisions_change = np.zeros((len(conditions), num_base_epochs - 1))

# Loop through conditions to make error consistency and krippendorf arrays
for ind, condition in enumerate(conditions):

    # Load data, conditions with more epochs than base are stored in base file
    results, val_acc, num_epochs, num_models, num_base_epochs = prep_condition(condition, folder_name, base_network)
    
    # Set model to be last model of condition
    model = num_models-1
        
    # Loop through epochs
    for epoch in range(num_epochs):
        
            # Print where we are
            print(f"Condition: {condition}, Model: {model}, Epoch: {epoch}")

            # Add decisions and whether they were correct to their respective arrays
            if epoch < num_base_epochs:
                
                # Remove error indices if wanted
                if correct:
                    
                    # For "decisions" array save the decisions of this model in this epoch
                    decisions[ind, :, epoch] = np.delete(np.array(results[model][epoch][1]),
                                                         error_inds)
                    
                    # For "decisions_correct" array save if the model decisions were correct
                    decisions_correct[ind, :, epoch] = np.delete(np.equal(np.array(results[model][epoch][1]),
                                                                          np.array(results[model][epoch][2])),
                                                                 error_inds)
                
                else:
                    
                    # Same procedure just without removing the error indices
                    decisions[ind, :, epoch] = np.array(results[model][epoch][1])
                    decisions_correct[ind, :, epoch] = np.equal(np.array(results[model][epoch][1]),
                                                                np.array(results[model][epoch][2]))
    
            # For "decisions_change" array save how many decisions change in every epoch
            if epoch < (num_base_epochs - 1):
                
                # Remove error indices if wanted
                if correct:
                    
                    # For "decisions_change" sum up how many items are equal between current and next epoch
                    decisions_change[ind, epoch] = np.sum(np.delete(np.equal(np.array(results[model][epoch][1]),
                                                                             np.array(results[model][epoch+1][1])),
                                                                    error_inds))

                else:
                    
                    # Same procedure just without removing the error indices
                    decisions_change[ind, epoch] = np.sum(np.equal(np.array(results[model][epoch][1]),
                                                               np.array(results[model][epoch+1][1])))
                    
# Make "mean_decisions" array by taking the mean over all conditions for this base network
mean_decisions_correct = np.mean(decisions_correct, axis=0)

# Save arrays
np.save(folder_name + f"{base_network}_mean_decisions_correct", mean_decisions_correct)
np.save(folder_name + f"{base_network}_decisions", decisions)
np.save(folder_name + f"{base_network}_decisions_correct", decisions_correct)
np.save(folder_name + f"{base_network}_decisions_change", decisions_change)

Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 0
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 1
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 2
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 3
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 4
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 5
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 6
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 7
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 8
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 9
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 10
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 11
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 12
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 13
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 14
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 15
Condition: Res18CIFAR_Base_condition, Model: 0, Epoch: 16
Condition: Res18CIFAR_Ba

Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 0
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 1
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 2
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 3
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 4
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 5
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 6
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 7
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 8
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 9
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 10
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 11
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 12
Condition: Res18CIFAR_Different_initialisation, Model: 4, Epoch: 13
Condition: Res18CIFAR_Different_initialisation, Model: 4, 

Condition: Res18CIFAR_Different_data, Model: 1, Epoch: 0
Condition: Res18CIFAR_Different_data, Model: 1, Epoch: 1
Condition: Res18CIFAR_Different_data, Model: 1, Epoch: 2
Condition: Res18CIFAR_Different_data, Model: 1, Epoch: 3
Condition: Res18CIFAR_Different_data, Model: 1, Epoch: 4
Condition: Res18CIFAR_Different_data, Model: 1, Epoch: 5
Condition: Res18CIFAR_Different_data, Model: 1, Epoch: 6
Condition: Res18CIFAR_Different_data, Model: 1, Epoch: 7
Condition: Res18CIFAR_Different_data, Model: 1, Epoch: 8
Condition: Res18CIFAR_Different_data, Model: 1, Epoch: 9
Condition: Res18CIFAR_Different_data, Model: 1, Epoch: 10
Condition: Res18CIFAR_Different_data, Model: 1, Epoch: 11
Condition: Res18CIFAR_Different_data, Model: 1, Epoch: 12
Condition: Res18CIFAR_Different_data, Model: 1, Epoch: 13
Condition: Res18CIFAR_Different_data, Model: 1, Epoch: 14
Condition: Res18CIFAR_Different_data, Model: 1, Epoch: 15
Condition: Res18CIFAR_Different_data, Model: 1, Epoch: 16
Condition: Res18CIFAR_Di

## 3. Create heat array

In [6]:
# Ordering for mean decisions correct array
mean_order = np.flip((mean_decisions_correct[:,-1]).argsort())

# Get indices for impossibles and trivials together
entries = np.unique(mean_decisions_correct[:, num_base_epochs-1])

# Built arrays of indices that should be removed
rm_num = 4
if rm_num == 4:
    rm_inds = np.concatenate((np.where(mean_decisions_correct[:, num_base_epochs-1] == entries[-1])[0], 
                              np.where(mean_decisions_correct[:, num_base_epochs-1] == entries[-2])[0], 
                              np.where(mean_decisions_correct[:, num_base_epochs-1] == entries[0])[0],
                              np.where(mean_decisions_correct[:, num_base_epochs-1] == entries[1])[0]))
elif rm_num == 2:
    rm_inds = np.concatenate((np.where(mean_decisions_correct[:, num_base_epochs-1] == entries[-1])[0],  
                              np.where(mean_decisions_correct[:, num_base_epochs-1] == entries[0])[0]))

In [7]:
# Remove different data condition from conditions array
conditions.pop(np.where([cond.endswith("Different_data") for cond in conditions])[0][0])

# Pre-allocate array for heat map
heat_array = np.zeros((len(conditions), len(conditions)))
heat_array_rm = np.zeros((len(conditions), len(conditions)))

# Loop through conditions to make heat map, except for different data compared condition
for ind_1, condition_1 in enumerate(conditions):
    for ind_2, condition_2 in enumerate(conditions):

        # Load data, conditions with more epochs than base are stored in base file
        results_1, val_acc_1, num_epochs_1, num_models_1, num_base_epochs_1 = prep_condition(condition_1,
                                                                                             folder_name, base_network)
        results_2, val_acc_2, num_epochs_2, num_models_2, num_base_epochs_2 = prep_condition(condition_2,
                                                                                             folder_name, base_network)

        # Init lists to keep track of error consistencies
        score = []
        score_rm = []

        # Calculate error consistency for all models of both conditions
        for model_1 in range(num_models_1):
            for model_2 in range(num_models_2):
                
                # Print information about categories and model number
                print(f"Condition 1: {condition_1}, Condition 2: {condition_2}," + 
                      f"Model 1: {model_1}, Model 2: {model_2}")
                    
                # Don't compare the same models for the same condition, as they will also have consistency of 1
                # Except for if there is only one model in this condition
                if not (condition_1 == condition_2 and model_1 == model_2) or condition_1 == condition_2 and num_models_1 == 1 and num_models_2 == 1:
                    ep_1 = np.equal(np.array(results_1[model_1][num_epochs_1-1][1]), 
                                    np.array(results_1[model_1][num_epochs_1-1][2]))
                    ep_2 = np.equal(np.array(results_2[model_2][num_epochs_2-1][1]), 
                                    np.array(results_2[model_2][num_epochs_2-1][2]))
              
                    # Save kappa for all images
                    score.append(cohen_kappa_score(ep_1, ep_2))

                    # Remove trivials and impossibles
                    ep_1 = np.delete(ep_1, rm_inds)
                    ep_2 = np.delete(ep_2, rm_inds)

                    # Save kappa without trivials and impossibles
                    score_rm.append(cohen_kappa_score(ep_1, ep_2))
                
        # Save to arrays
        heat_array[ind_1, ind_2] = np.mean(score)
        heat_array_rm[ind_1, ind_2] = np.mean(score_rm)
        
# Save arrays
np.save(folder_name + f"{base_network}_heat_array", heat_array)
np.save(folder_name + f"{base_network}_heat_array_rm", heat_array_rm)

Condition 1: Res18_Base_condition, Condition 2: Res18_Base_condition,Model 1: 0, Model 2: 0
Condition 1: Res18_Base_condition, Condition 2: Res18_Plus_1ep,Model 1: 0, Model 2: 0
Condition 1: Res18_Base_condition, Condition 2: Res18_Plus_10ep,Model 1: 0, Model 2: 0
Condition 1: Res18_Base_condition, Condition 2: Res18_Different_optimizer,Model 1: 0, Model 2: 0
Condition 1: Res18_Base_condition, Condition 2: Res18_Different_batchsize,Model 1: 0, Model 2: 0
Condition 1: Res18_Base_condition, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 0
Condition 1: Res18_Base_condition, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 1
Condition 1: Res18_Base_condition, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 2
Condition 1: Res18_Base_condition, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 3
Condition 1: Res18_Base_condition, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 4
Condition 1: Res18_Base_conditi

Condition 1: Res18_Plus_10ep, Condition 2: Res18_Different_architecture,Model 1: 0, Model 2: 0
Condition 1: Res18_Plus_10ep, Condition 2: Res18_Half_data,Model 1: 0, Model 2: 0
Condition 1: Res18_Plus_10ep, Condition 2: Res18_Half_data,Model 1: 0, Model 2: 1
Condition 1: Res18_Plus_10ep, Condition 2: Res18_Combined_condition,Model 1: 0, Model 2: 0
Condition 1: Res18_Plus_10ep, Condition 2: Res18_Combined_condition,Model 1: 0, Model 2: 1
Condition 1: Res18_Plus_10ep, Condition 2: Res18_Combined_condition,Model 1: 0, Model 2: 2
Condition 1: Res18_Plus_10ep, Condition 2: Res18_Combined_condition,Model 1: 0, Model 2: 3
Condition 1: Res18_Plus_10ep, Condition 2: Res18_Combined_condition,Model 1: 0, Model 2: 4
Condition 1: Res18_Different_optimizer, Condition 2: Res18_Base_condition,Model 1: 0, Model 2: 0
Condition 1: Res18_Different_optimizer, Condition 2: Res18_Plus_1ep,Model 1: 0, Model 2: 0
Condition 1: Res18_Different_optimizer, Condition 2: Res18_Plus_10ep,Model 1: 0, Model 2: 0
Condit

Condition 1: Res18_Different_initialisation, Condition 2: Res18_Plus_10ep,Model 1: 0, Model 2: 0
Condition 1: Res18_Different_initialisation, Condition 2: Res18_Plus_10ep,Model 1: 1, Model 2: 0
Condition 1: Res18_Different_initialisation, Condition 2: Res18_Plus_10ep,Model 1: 2, Model 2: 0
Condition 1: Res18_Different_initialisation, Condition 2: Res18_Plus_10ep,Model 1: 3, Model 2: 0
Condition 1: Res18_Different_initialisation, Condition 2: Res18_Plus_10ep,Model 1: 4, Model 2: 0
Condition 1: Res18_Different_initialisation, Condition 2: Res18_Different_optimizer,Model 1: 0, Model 2: 0
Condition 1: Res18_Different_initialisation, Condition 2: Res18_Different_optimizer,Model 1: 1, Model 2: 0
Condition 1: Res18_Different_initialisation, Condition 2: Res18_Different_optimizer,Model 1: 2, Model 2: 0
Condition 1: Res18_Different_initialisation, Condition 2: Res18_Different_optimizer,Model 1: 3, Model 2: 0
Condition 1: Res18_Different_initialisation, Condition 2: Res18_Different_optimizer,Mod

Condition 1: Res18_Different_initialisation, Condition 2: Res18_CUDA_nondeterministic,Model 1: 3, Model 2: 3
Condition 1: Res18_Different_initialisation, Condition 2: Res18_CUDA_nondeterministic,Model 1: 3, Model 2: 4
Condition 1: Res18_Different_initialisation, Condition 2: Res18_CUDA_nondeterministic,Model 1: 4, Model 2: 0
Condition 1: Res18_Different_initialisation, Condition 2: Res18_CUDA_nondeterministic,Model 1: 4, Model 2: 1
Condition 1: Res18_Different_initialisation, Condition 2: Res18_CUDA_nondeterministic,Model 1: 4, Model 2: 2
Condition 1: Res18_Different_initialisation, Condition 2: Res18_CUDA_nondeterministic,Model 1: 4, Model 2: 3
Condition 1: Res18_Different_initialisation, Condition 2: Res18_CUDA_nondeterministic,Model 1: 4, Model 2: 4
Condition 1: Res18_Different_initialisation, Condition 2: Res18_Different_dataorder,Model 1: 0, Model 2: 0
Condition 1: Res18_Different_initialisation, Condition 2: Res18_Different_dataorder,Model 1: 0, Model 2: 1
Condition 1: Res18_Diff

Condition 1: Res18_Different_LR, Condition 2: Res18_Plus_10ep,Model 1: 0, Model 2: 0
Condition 1: Res18_Different_LR, Condition 2: Res18_Plus_10ep,Model 1: 1, Model 2: 0
Condition 1: Res18_Different_LR, Condition 2: Res18_Plus_10ep,Model 1: 2, Model 2: 0
Condition 1: Res18_Different_LR, Condition 2: Res18_Plus_10ep,Model 1: 3, Model 2: 0
Condition 1: Res18_Different_LR, Condition 2: Res18_Plus_10ep,Model 1: 4, Model 2: 0
Condition 1: Res18_Different_LR, Condition 2: Res18_Different_optimizer,Model 1: 0, Model 2: 0
Condition 1: Res18_Different_LR, Condition 2: Res18_Different_optimizer,Model 1: 1, Model 2: 0
Condition 1: Res18_Different_LR, Condition 2: Res18_Different_optimizer,Model 1: 2, Model 2: 0
Condition 1: Res18_Different_LR, Condition 2: Res18_Different_optimizer,Model 1: 3, Model 2: 0
Condition 1: Res18_Different_LR, Condition 2: Res18_Different_optimizer,Model 1: 4, Model 2: 0
Condition 1: Res18_Different_LR, Condition 2: Res18_Different_batchsize,Model 1: 0, Model 2: 0
Condi

Condition 1: Res18_Different_LR, Condition 2: Res18_CUDA_nondeterministic,Model 1: 4, Model 2: 4
Condition 1: Res18_Different_LR, Condition 2: Res18_Different_dataorder,Model 1: 0, Model 2: 0
Condition 1: Res18_Different_LR, Condition 2: Res18_Different_dataorder,Model 1: 0, Model 2: 1
Condition 1: Res18_Different_LR, Condition 2: Res18_Different_dataorder,Model 1: 0, Model 2: 2
Condition 1: Res18_Different_LR, Condition 2: Res18_Different_dataorder,Model 1: 0, Model 2: 3
Condition 1: Res18_Different_LR, Condition 2: Res18_Different_dataorder,Model 1: 0, Model 2: 4
Condition 1: Res18_Different_LR, Condition 2: Res18_Different_dataorder,Model 1: 1, Model 2: 0
Condition 1: Res18_Different_LR, Condition 2: Res18_Different_dataorder,Model 1: 1, Model 2: 1
Condition 1: Res18_Different_LR, Condition 2: Res18_Different_dataorder,Model 1: 1, Model 2: 2
Condition 1: Res18_Different_LR, Condition 2: Res18_Different_dataorder,Model 1: 1, Model 2: 3
Condition 1: Res18_Different_LR, Condition 2: Re

Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 0
Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 1
Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 2
Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 3
Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 4
Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_initialisation,Model 1: 1, Model 2: 0
Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_initialisation,Model 1: 1, Model 2: 1
Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_initialisation,Model 1: 1, Model 2: 2
Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_initialisation,Model 1: 1, Model 2: 3
Condition 1: Res18_

Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_dataorder,Model 1: 1, Model 2: 1
Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_dataorder,Model 1: 1, Model 2: 2
Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_dataorder,Model 1: 1, Model 2: 3
Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_dataorder,Model 1: 1, Model 2: 4
Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_dataorder,Model 1: 2, Model 2: 0
Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_dataorder,Model 1: 2, Model 2: 1
Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_dataorder,Model 1: 2, Model 2: 2
Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_dataorder,Model 1: 2, Model 2: 3
Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Different_dataorder,Model 1: 2, Model 2: 4
Condition 1: Res18_CUDA_nondeterministic, Condition 2: Res18_Dif

Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 0
Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 1
Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 2
Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 3
Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 4
Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_initialisation,Model 1: 1, Model 2: 0
Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_initialisation,Model 1: 1, Model 2: 1
Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_initialisation,Model 1: 1, Model 2: 2
Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_initialisation,Model 1: 1, Model 2: 3
Condition 1: Res18_Different_dataorde

Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_dataorder,Model 1: 1, Model 2: 3
Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_dataorder,Model 1: 1, Model 2: 4
Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_dataorder,Model 1: 2, Model 2: 0
Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_dataorder,Model 1: 2, Model 2: 1
Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_dataorder,Model 1: 2, Model 2: 2
Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_dataorder,Model 1: 2, Model 2: 3
Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_dataorder,Model 1: 2, Model 2: 4
Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_dataorder,Model 1: 3, Model 2: 0
Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_dataorder,Model 1: 3, Model 2: 1
Condition 1: Res18_Different_dataorder, Condition 2: Res18_Different_dataorder,Mod

Condition 1: Res18_Different_architecture, Condition 2: Res18_Different_architecture,Model 1: 0, Model 2: 0
Condition 1: Res18_Different_architecture, Condition 2: Res18_Half_data,Model 1: 0, Model 2: 0
Condition 1: Res18_Different_architecture, Condition 2: Res18_Half_data,Model 1: 0, Model 2: 1
Condition 1: Res18_Different_architecture, Condition 2: Res18_Combined_condition,Model 1: 0, Model 2: 0
Condition 1: Res18_Different_architecture, Condition 2: Res18_Combined_condition,Model 1: 0, Model 2: 1
Condition 1: Res18_Different_architecture, Condition 2: Res18_Combined_condition,Model 1: 0, Model 2: 2
Condition 1: Res18_Different_architecture, Condition 2: Res18_Combined_condition,Model 1: 0, Model 2: 3
Condition 1: Res18_Different_architecture, Condition 2: Res18_Combined_condition,Model 1: 0, Model 2: 4
Condition 1: Res18_Half_data, Condition 2: Res18_Base_condition,Model 1: 0, Model 2: 0
Condition 1: Res18_Half_data, Condition 2: Res18_Base_condition,Model 1: 1, Model 2: 0
Conditio

Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_batchsize,Model 1: 0, Model 2: 0
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_batchsize,Model 1: 1, Model 2: 0
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_batchsize,Model 1: 2, Model 2: 0
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_batchsize,Model 1: 3, Model 2: 0
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_batchsize,Model 1: 4, Model 2: 0
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 0
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 1
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 2
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_initialisation,Model 1: 0, Model 2: 3
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_ini

Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_dataorder,Model 1: 1, Model 2: 1
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_dataorder,Model 1: 1, Model 2: 2
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_dataorder,Model 1: 1, Model 2: 3
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_dataorder,Model 1: 1, Model 2: 4
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_dataorder,Model 1: 2, Model 2: 0
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_dataorder,Model 1: 2, Model 2: 1
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_dataorder,Model 1: 2, Model 2: 2
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_dataorder,Model 1: 2, Model 2: 3
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_dataorder,Model 1: 2, Model 2: 4
Condition 1: Res18_Combined_condition, Condition 2: Res18_Different_dataorder,Model 1: 3, M

# Create arrays for SOTA-Models

In [6]:
# Set base network and conditions
base_network = "Rebuttal"
folder_name = f"./results/{base_network}/"
figure_path = f"./figures/{base_network}/"
conditions = os.listdir(folder_name)

# Filter .npy arrays from conditions list
conditions = [f for f in conditions if '.npy' not in f]
print(f"{len(conditions)} models found: {conditions}.")

# Set flag whether to correct for label errors
correct = False

# Set flag whether to remove impossibles and trivials for heatmap analysis
rm = False

# Load label error indices if they should be corrected for
label_error = np.load("imagenet_val_ident.npy", allow_pickle=True)[:, 2].astype(np.int)
error_inds = np.where(label_error == 1)
right_inds = np.where(label_error == 0)

# Pre-allocate decisions_correct, class accuracy and model accuracy arrays
decisions_correct = np.zeros((len(conditions), 50000, 1))
class_accuracies = np.zeros((len(conditions), 1000))
model_accuracies = np.zeros((len(conditions)))

11 models found: ['CLIP', 'CORnet_RT', 'bagnet33', 'hrnet_w44', 'resnet152', 'resnet50_l2_eps1', 'resnet50_swsl', 'resnet50_trained_on_SIN', 'simclr_resnet50x1', 'squeezenet1_1', 'vit_base_patch16_224'].


## 1. Decisions arrays

In [7]:
# Loop through models
for cond_ind, condition in enumerate(conditions):
    
    # Special case for clip
    if condition == "CLIP":
        
        # Load data
        results = np.genfromtxt(folder_name + condition + "/NUM1/RESULTS_EP0.csv", delimiter=',', dtype="|S10")
        results = [[[results[:,0], results[:,1]]]]
        equal_answers = (results[0][0][0] == results[0][0][1])
        
        # Print model name and accuracy
        acc = np.mean(equal_answers)
        print(f'{condition}, Accuracy: {acc:.2f}')
        
        # Get class accuracies
        num_classes = len(np.unique(results[0][0][1]))
        for ind, category in enumerate(np.unique(results[0][0][1])):
            class_accuracies[cond_ind, ind] = np.mean(equal_answers[np.where(results[0][0][1] == category)[0]])
            
        # Prepare data for histogram
        decisions_correct[cond_ind, :, 0] = np.array(results[0][0][0] == results[0][0][1])
        
    else:
        
        # Load data
        results, val_acc = load_data(folder_name + condition + "/")

        # Print model name and accuracy
        acc = np.equal(results[0][0][0], results[0][0][1]).float().mean()
        print(f'{condition}, Accuracy: {acc:.2f}')

        # Get class accuracies
        num_classes = len(np.unique(results[0][0][1]))
        equal_answers = np.equal(results[0][0][0], results[0][0][1]).numpy()
        for ind in range(num_classes):
            class_accuracies[cond_ind, ind] = np.mean(equal_answers[np.where(results[0][0][1] == ind)[0]])
        
        # Prepare data for histogram
        decisions_correct[cond_ind, :, 0] = np.equal(np.array(results[0][0][0]), np.array(results[0][0][1]))
        
    model_accuracies[cond_ind] = acc

# Make "mean_decisions" array by taking the mean over all conditions for this base network
mean_decisions_correct = np.mean(decisions_correct, axis=0)

# Save arrays
np.save(folder_name + f"{base_network}_mean_decisions_correct", mean_decisions_correct)
np.save(folder_name + f"{base_network}_decisions_correct", decisions_correct)
np.save(folder_name + f"{base_network}_model_accuracies", model_accuracies)
np.save(folder_name + f"{base_network}_class_accuracies", class_accuracies)

CLIP, Accuracy: 0.63
CORnet_RT, Accuracy: 0.55
bagnet33, Accuracy: 0.67
hrnet_w44, Accuracy: 0.79
resnet152, Accuracy: 0.78
resnet50_l2_eps1, Accuracy: 0.70
resnet50_swsl, Accuracy: 0.81
resnet50_trained_on_SIN, Accuracy: 0.60
simclr_resnet50x1, Accuracy: 0.68
squeezenet1_1, Accuracy: 0.58
vit_base_patch16_224, Accuracy: 0.78


## 2. Heat map array

In [9]:
# Sota models were only tested once
num_base_epochs = 1

# Ordering for mean decisions correct array
mean_order = np.flip((mean_decisions_correct[:,-1]).argsort())

# Get indices for impossibles and trivials together
entries = np.unique(mean_decisions_correct[:, num_base_epochs-1])

# Built arrays of indices that should be removed
rm_num = 4
if rm_num == 4:
    rm_inds = np.concatenate((np.where(mean_decisions_correct[:, num_base_epochs-1] == entries[-1])[0], 
                              np.where(mean_decisions_correct[:, num_base_epochs-1] == entries[-2])[0], 
                              np.where(mean_decisions_correct[:, num_base_epochs-1] == entries[0])[0],
                              np.where(mean_decisions_correct[:, num_base_epochs-1] == entries[1])[0]))
elif rm_num == 2:
    rm_inds = np.concatenate((np.where(mean_decisions_correct[:, num_base_epochs-1] == entries[-1])[0],  
                              np.where(mean_decisions_correct[:, num_base_epochs-1] == entries[0])[0]))

In [10]:
# Pre-allocate array for heat map
heat_array = np.empty((len(conditions), len(conditions)))
heat_array[:] = np.nan
heat_array_rm = np.empty((len(conditions), len(conditions)))
heat_array_rm[:] = np.nan

# Loop through conditions to make heat map, except for different data compared condition
for ind_1, condition_1 in enumerate(conditions):
    for ind_2, condition_2 in enumerate(conditions):

        # Load data, conditions with more epochs than base are stored in base file
        num_epochs_1 = 1
        num_epochs_2 = 1
        num_models_1 = 0
        num_models_2 = 0
        
        if condition_1 == "CLIP":
            results_1 = np.genfromtxt(folder_name + condition_1 + "/NUM1/RESULTS_EP0.csv", delimiter=',', dtype="|S10")
            results_1 = [[[results_1[:,0], results_1[:,1]]]]
        else:
            results_1, _ = load_data(folder_name + condition_1 + "/")
        
        if condition_2 == "CLIP":
            results_2 = np.genfromtxt(folder_name + condition_2 + "/NUM1/RESULTS_EP0.csv", delimiter=',', dtype="|S10")
            results_2 = [[[results_2[:,0], results_2[:,1]]]]
        else:
            results_2, _ = load_data(folder_name + condition_2 + "/")

        # Pre-allocate heat array for this combination of conditions
        comb_array = []
        comb_array_ep1 = []

        # Calculate error consistency to base model
        ep_1 = np.array(results_1[0][0][0]) == np.array(results_1[0][0][1])
        ep_2 = np.array(results_2[0][0][0]) == np.array(results_2[0][0][1])

        # Save kappa for all images
        heat_array[ind_1, ind_2] = cohen_kappa_score(ep_1, ep_2)
        
        # Remove trivials and impossibles
        ep_1 = np.delete(ep_1, rm_inds)
        ep_2 = np.delete(ep_2, rm_inds)
        
        # Save kappa without trivials and impossibles
        heat_array_rm[ind_1, ind_2] = cohen_kappa_score(ep_1, ep_2)
        
# Save arrays
np.save(folder_name + f"{base_network}_heat_array", heat_array)
np.save(folder_name + f"{base_network}_heat_array_rm", heat_array_rm)