# Reproducible code for the associated paper

## Introduction

Here is the reproducible Python code used to run the experiments in the paper and output the presented plots. We test the performance of various anomaly detection strategies::

- LODA (Pevny 2015)
- LODA with updated linear combination weights using active learning (Das et al. 2016)
- GLAD (Islam et al. 2018)
- Our AAA method


## All neccesary background loads

Importing our __acanag__ library already imports a number of other functions/libraries. A few others to import for this reproducible codes are also below. 

In [None]:
from acanag import *

import seaborn as sns

from matplotlib.lines import Line2D
import matplotlib.lines as mlines

from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc
from sklearn.neighbors import KernelDensity
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.neighbors import LocalOutlierFactor

from scipy.stats import gaussian_kde
from scipy.stats import norm

# Two-dimensional Gaussian trials

## Simulation parameters

In [None]:
# The number of "old" data points we start with:
n_old = 1000

# The number of data points in each future batch:
B = 500

# The number of future batches:
n_loops = 200

# The mixture parameter:
tau = 0.01

#Minimum number of data points to use to calibrate the number of LODA projections, unless the 
#today number of data points is less than n_min. The choice between the two happens elsewhere
#so this can be left here.
n_min = 1000

#Set an upper bound for the number of LODA projections:
M_max = 15

#Parameter set to default 0.01 in Pevny (2015). However, in small-dimensional settings (e.g., d=2) this
#may not be a good idea?
tau_M = 0.1

#We also need to decide how many data-points to feed to the expert in each loop IN TOTAL. In
#Das et al. (2016) they feed the top one. Here there are different algorithms, and not all of the
#have an active learning step. What we do is define the total number of items the expert can see in
#each pass = n_send. However, for instance with our method, not all of these will be sent because
#they had high scores (= possible anomaly). Some of them will be kept aside to be used in an active
#learning strategy (e.g., uncertainty sampling)
n_send = 5

#Proportion of initial data we suppose we know the true anomaly status of:
u = 0.1

#Number of simulation trials for each setting:
n_trials = 5

In our simulations we have the nominal data as $N((0,0),I_2)$ and the anomaly data as $N((c,c),0.1*I_2)$ for four choices of $c$.


## $c=2$

### Our method

In [None]:
np.random.seed(123456789)

In [None]:
############################################################################
# Two-dimensional Gaussian nominals with one two-dimensional Gaussian anomaly 
############################################################################

c = 2

all_AAA_AUC = np.zeros((n_trials,n_loops))
all_nY1_AAA = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    # Specific arguments:
    a_list = [[c,c]]
    anomaly_cov_list = [ 
        [[0.1,0],[0,0.1]]
    ]
    nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same mixture distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    
    # Your code for plotting data
    plt.figure(figsize=(10, 8))
    
    # Plot density of nominals (blue)
    sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6)
    
    # Plot density of anomalies (red)
    sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6)
    
    # Customizing the plot
    plt.title("Density Plot (2D)", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Manually create legend
    legend_elements = [
        Line2D([0], [0], color='blue', lw=2, label="Nominals"),
        Line2D([0], [0], color='red', lw=2, label="Anomalies")
    ]
    
    plt.legend(handles=legend_elements, loc='upper right')  # Manually adding legend
    
    plt.grid(True)
    plt.show()
    
    
    plt.figure(figsize=(10, 8))
    
    # Scatter plot for nominals (blue)
    plt.scatter(X[Y == 0][:, 0], X[Y == 0][:, 1], color='blue', alpha=0.6, label="Nominals")
    
    # Scatter plot for anomalies (red)
    plt.scatter(X[Y == 1][:, 0], X[Y == 1][:, 1], color='red', alpha=0.6, label="Anomalies")
    
    # Customizing the plot
    plt.title("Scatter Plot of Nominals and Anomalies", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Adding legend
    plt.legend(loc='upper right')  # Automatically adding legend with labels from scatter plots
    
    # Adding grid
    plt.grid(True)
    
    # Showing the plot
    plt.show()
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        print('There is no initial data.')
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)


    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()

    
    #Choose a supervised method that will be applied:
    supervised_method = 'LogisticRegression'

    #Initialization

    #Dealing with edge cases:
    if n_old == 0:
        X_old = None
        Y_old = None
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted.tolist()  #We directly use Y_muted, as in all the other methods
    
    # Run the initialization function InitActiveAGG:
    X_lab, Y_lab, all_labeled_scores = InitActiveAGG(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models)


    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    AAA_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Learn from labeled data, propose new predicted anomalies, and propose other data to label:
        X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 1,min_n_nom=5,min_n_anom=1,tau_exp=tau)  
        
        # Pretend to be the expert and add the true labels to the proposed data:
        expert_provided_labels = [Y_new[j] for j in indices_to_expert]
        Y_lab = Y_lab + expert_provided_labels
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_AAA[curr_trial, r] = nY1
        
        #Test the current learned model on the external data in order to calculate the AUC:
        if learned_model != None:
            new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:,1]
            AAA_AUC[r] = roc_auc_score(Y_AUC,new_preds)
            all_AAA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,new_preds)
        else:
            AAA_AUC[r] = 0.5
            all_AAA_AUC[curr_trial,r] = 0.5
            
        
        
        #Update indices:
        curr_L_index = curr_L_index + B
    
# Calculate column averages for each array
avg_AAA_2 = np.mean(all_AAA_AUC, axis=0)
avg_nY1_AAA_2 = np.mean(all_nY1_AAA, axis=0)

#Save these results:
np.savez("AUC_2_onlyAAA.npz",
         avg_AAA_2 = avg_AAA_2,
         avg_nY1_AAA_2 = avg_nY1_AAA_2,
         Y_AUC_2_onlyAAA = Y_AUC #The AUC for the last loop
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_2_onlyAAA.npz",
         new_preds_2 = new_preds
        )


### The other three methods

In [None]:
np.random.seed(123456789)

In [None]:
############################################################################
# Two-dimensional Gaussian nominals with one two-dimensional Gaussian anomaly 
############################################################################

c = 2

all_LODA_AUC = np.zeros((n_trials,n_loops))
all_active_LODA_AUC = np.zeros((n_trials,int(n_loops/2)))
all_GLAD_AUC = np.zeros((n_trials,n_loops))

all_nY1_LODA = np.zeros((n_trials, n_loops))  # Array to store nY1 values
all_nY1_active_LODA = np.zeros((n_trials, int(n_loops/2)))  # Array to store nY1 values
all_nY1_GLAD = np.zeros((n_trials, n_loops))  # Array to store nY1 values


for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    # Specific arguments:
    a_list = [[c,c]]
    anomaly_cov_list = [ 
        [[0.1,0],[0,0.1]]
    ]
    nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same mixture distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    
    # Your code for plotting data
    plt.figure(figsize=(10, 8))
    
    # Plot density of nominals (blue)
    sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6)
    
    # Plot density of anomalies (red)
    sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6)
    
    # Customizing the plot
    plt.title("Density Plot (2D)", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Manually create legend
    legend_elements = [
        Line2D([0], [0], color='blue', lw=2, label="Nominals"),
        Line2D([0], [0], color='red', lw=2, label="Anomalies")
    ]
    
    plt.legend(handles=legend_elements, loc='upper right')  # Manually adding legend
    
    plt.grid(True)
    plt.show()
    
    
    plt.figure(figsize=(10, 8))
    
    # Scatter plot for nominals (blue)
    plt.scatter(X[Y == 0][:, 0], X[Y == 0][:, 1], color='blue', alpha=0.6, label="Nominals")
    
    # Scatter plot for anomalies (red)
    plt.scatter(X[Y == 1][:, 0], X[Y == 1][:, 1], color='red', alpha=0.6, label="Anomalies")
    
    # Customizing the plot
    plt.title("Scatter Plot of Nominals and Anomalies", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Adding legend
    plt.legend(loc='upper right')  # Automatically adding legend with labels from scatter plots
    
    # Adding grid
    plt.grid(True)
    
    # Showing the plot
    plt.show()
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        print('There is no initial data.')
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
      
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    
    #####################################################################################################################
    # LODA TRIALS

    print('LODA trial begins')
    
    curr_L_index = n_old
    LODA_AUC = [0]*n_loops
       
    weighted_scores=np.mean(new_unweighted_validation_scores,axis=1)

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(n_loops):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Final LODA scores are averages over anomaly detectors
        new_LODA_scores = np.mean(new_unweighted_scores,axis=1)

        #############################################################
        # Sort these scores:
        top_k = n_send
        sorted_indices = np.argsort(new_LODA_scores)
        sorted_scores = new_LODA_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_LODA[curr_trial,r] = nY1
        #############################################################
        
        #############################################################
        LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_scores)
        all_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
    #####################################################################################################################
    # ACTIVE-LODA trials:

    print('active-LODA trial begins')
    
    #We do however have to provide C_tau since it needs to be used before the optimization
    #function. C_tau = 0.03 is the default suggested in Das et al. (2016).
    C_tau = tau
    
    #In active LODA, the whole budget of n_send items per loop is dedicated to sending the 
    #items with highest predicted scores:
    top_k = n_send

    #So now that we have the LODA projectors (i.e., a set of best_m anomaly detectors),
    #we can begin.
    
    #We shall initialize the vector of weights as being equal and summing to 1:
    w_old = np.array([1/best_m for i in range(best_m)])
    #print(np.shape(w_old))
    
    #We also initialize arrays to put the unweighted scores of labeled data into:
    H_A = np.empty((0, best_m))
    H_N = np.empty((0, best_m))
    
    #We also initialize a fake anomaly alert to 0 (see below). This means basically that
    #we have not so far had to add a "fake anomaly" to the optimization due to there only
    #being labeled nominals so far.
    fake_anomaly = 0
    
    #Unlike basic LODA, here anomaly and nominal labels MATTER. In particular, at the
    #beginning, it matters whether there is initial "old" data, and if so, whether some
    #or all of it is already labeled. If n_old > 0, then we have already calculated Y_muted
    #earlier in this script (for some fixed percentage u of this "old" data for which we
    #suppose we know its true label)
    
    #If there were initial data and at least one labeled nominal (following Das et al. (2016))
    if n_old > 0 and np.sum(Y_muted == 0) > 0:
            
        #Extract the initial data from X:
        X_new = X[:n_old,:]
    
        #Calculate the unweighted scores for each LODA projector on the initial data: 
        new_unweighted_scores = np.empty((n_old,best_m))
    
        for i, (name, model) in enumerate(models.items()):
    
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
    
        #Calculate the sum of the linear combination of these scores weighted by w_old:
        new_scores = np.matmul(new_unweighted_scores,w_old)
    
        #Sort new_scores from smallest to largest, whilst retaining the indices.
        sorted_indices = np.argsort(new_scores)
        sorted_scores = new_scores[sorted_indices]
    
        #Calculate q_tau on this initial data: 
        #WARNING: one of the underlying problems with active-LODA is that it basically expects
        #anomalies to have the highest scores from the get go. But here, in this first loop,
        #it very well could be that the anomalies have all the LOWEST scores. The calculation
        #of q_tau at this point is therefore a bit weird/pointless. Also, active-LODA basically
        #expects positive weights, especially in its minimization step, with its L2 norm penalty
        #on the weights. This means that active-LODA does not reach its true potential, as
        #defined and coded by Das et al. (2016). The trouble is is that without a penalty on 
        #making the weights not too big, not too small, and not necessarily positive, it will
        #remain suboptimal. 
        my_quantile_sorted_index = int(np.floor(n_old*(1-C_tau)))-1*(C_tau != 1)
        q_tau = sorted_scores[my_quantile_sorted_index]
    
        #Create the arrays H_A and H_N:
        #First we take H_A. In Das et al. (2016) they allow for the case that there
        #are no labeled anomalies, only labeled nominals.
        if np.sum(Y_muted == 1) == 0:
            #Set a "fake anomaly alert to 1":
            fake_anomaly = 1
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            H_N = new_unweighted_scores[(Y_muted==0),:]
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
                    
        else:
            #There is at least one labeled anomaly:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            H_N = new_unweighted_scores[(Y_muted==0),:]
     
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
            
            temp_unweighted = np.concatenate([H_A,H_N])
            temp_pred = np.matmul(temp_unweighted,w_new)
            temp_YA = [1]*np.shape(H_A)[0]
            temp_YN = [0]*np.shape(H_N)[0]
            temp_Y = temp_YA + temp_YN
            
        #We can now update w_old with the value of w_new. If this whole big loop wasn't run,
        #then w_old will stay at its original value.
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
            
    #Else if there were initial data but no labeled nominals: Das et al. (2016)
    #provide no details for what to do here. It is possible though unlikely that
    #in the initial data, there were no labeled nominals but there was at least
    #one labeled anomaly. We do have to check this, since we will need to initialize
    #H_A in this case, even if H_N is empty.
    elif n_old > 0 and np.sum(Y_muted == 0) == 0:
        if np.sum(Y_muted == 1) > 0:
            #initialize H_A:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            #Potential Python issue whereby if there is just one anomaly, weird
            #things happen with array shapes. To guard against this:
            if np.sum(Y_muted == 1) == 1:
                if np.shape(H_A) != (1,best_m):
                    H_A = H_A.reshape(1,-1)
                
    #So, at this point, either we had no old data and w_old is still equal weights, 
    #with H_A, H_N empty arrays, or else there was old data, and H_A and H_N may 
    #have been added to (or not), while w_old may or may not have already been 
    #updated.
    
    #We next move to the batch data.
    
    curr_L_index = n_old
    active_LODA_AUC = [0]*n_loops

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]

    
    for r in range(int(n_loops/2)):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        n_new_true_anom = sum(Y_new==1)
        
        #Update for the next loop:
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Temporary final active-LODA scores are a linear combination over anomaly detectors:
        temp_new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        #Calculated the weighted scores on the external validation set with the current
        #value of w_old:
        weighted_validation_scores = np.matmul(new_unweighted_validation_scores,w_old)
        active_LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        all_active_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_validation_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
        
        new_unweighted_scores = np.concatenate([new_unweighted_scores,H_A,H_N])
        
        #Actual final active-LODA scores are a linear combination over anomaly detectors:
        new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)

        temp_Y = Y_new.tolist() + [1]*np.shape(H_A)[0] + [0]*np.shape(H_N)[0]

        #Following the methodology in Das et al. (2016), we should provide the 
        #highest scoring data-point to an expert for labeling. In order to be
        #slightly more general, we shall instead provide the top_k scoring data
        #points to the expert, where top_k has been pre-defined.
        
        #Sort new_active_LODA_scores from smallest to largest, whilst retaining the indices.
        #Remember that these may include items from the previous loop or from the initialization.
        #However, since we appended those on to the end, we know their indices will be B, B+1,...
        #so we will be able to look out for them.
        sorted_indices = np.argsort(new_active_LODA_scores)
        sorted_scores = new_active_LODA_scores[sorted_indices]

        my_quantile_sorted_index = int(np.floor(len(new_active_LODA_scores)*(1-C_tau)))-1*(C_tau != 1)
        
        #We now have to go through the sorted_indices from the end back towards the 
        #beginning until we manage to gather top_k indices which are less than or equal to B-1.
        top_k_indices = []
        n_indices_so_far = 0
        curr_index = len(sorted_indices)
        while n_indices_so_far < top_k:
            curr_index = curr_index - 1
            next_top_index = sorted_indices[curr_index]
            if next_top_index < B:
                top_k_indices.append(next_top_index)
                n_indices_so_far = n_indices_so_far + 1
                
        #Now we go and get the labels in Y_new associated with these indices:
        #top_k_indices = np.array(top_k_indices)
        Y_expert = Y_new[top_k_indices]

        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_active_LODA[curr_trial,r] = nY1

        nY0 = Y_lab.count(0)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        
        #Now is a good time to check whether we just found at least one real anomaly while
        #up to now we only had one fake anomaly:
        if sum(Y_expert==1) > 0 and fake_anomaly==1:
            #We now reset H_A to be empty:
            H_A = np.empty((0, best_m))
            #And we set fake_anomaly to 0 forever:
            fake_anomaly = 0
            
        if sum(Y_expert==1) == 0 and fake_anomaly==1:
            #We reset H_A back to empty again:
            H_A = np.empty((0, best_m))
                
        #We then need to append the relevant unweighted scores to the current H_A and H_N
        for j in range(top_k):
            if Y_expert[j]==1:
                H_A = np.concatenate([H_A,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
            else:
                H_N = np.concatenate([H_N,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
                
        #Here we have to check whether this was the first time H_A actually contained
        #a real anomaly or not, rather than a temporary one (if there were no labeled anomalies)    
        
        #Now since top_k > 0 we know that there is a positive number of labeled data for sure.
        #We always have to retain however the possibility that all of the labeled data so far
        #are nominals. 
        if np.shape(H_A)[0]==0:
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
        else:
            w_new = optimize_w_2(H_A, H_N, q_tau)
    
        #Update q_tau on this data:
        q_tau = sorted_scores[my_quantile_sorted_index]
        #print('q tau:',q_tau)
        
        #update w_old
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
        temp_unweighted = np.concatenate([H_A,H_N])
        temp_pred = np.matmul(temp_unweighted,w_old)
        temp_YA = [1]*np.shape(H_A)[0]
        temp_YN = [0]*np.shape(H_N)[0]
        temp_Y = temp_YA + temp_YN

    #####################################################################################################################
    # GLAD trials:

    print('GLAD trial begins')
    
    #Initialize some parameters:
    b = 0.5
    mylambda = 1
    top_k = n_send
    q_tau_tm1 = -10e7
    
    # Build the model:
    model_GLAD = build_neural_network(np.shape(X)[1], best_m)
    
    # Compile the model with the custom loss function:
    model_GLAD.compile(optimizer='adam', loss=custom_binary_crossentropy_loss(b=b,mylambda=mylambda), metrics=['accuracy'])
    
    # Train the model
    # We use the same n_min used earlier to find the number of LODA projections:
    y_true = np.full((n_min, best_m), b)
    model_GLAD.fit(X[:n_min,:], y_true, epochs=10, batch_size=32,verbose=0)
    
    # Sanity check: the output for each data point should all be very close to b:
    row = X[1:2] 
    output = model_GLAD.predict(row)
    
    #INITIALIZATION
    #Dealing with edge cases:
    if n_old == 0:
        X_lab = np.empty([0,np.shape(X)[1]])
        Y_lab = []
        all_labeled_scores = np.empty([0,best_m])
        all_unweighted_scores = np.empty([0,best_m])
    
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted  #We directly use Y_muted, as in all the other methods
        which_lab = [i for i in range(len(Y_old)) if not np.isnan(Y_old[i])]
    
        if len(which_lab) == 0:
            X_lab = np.empty([0,np.shape(X)[1]])
            Y_lab = [] 
            all_unweighted_scores = np.empty([0,best_m])
            all_labeled_scores = np.empty([0,best_m])
        else:
            X_lab = X_old[which_lab,:]
            Y_lab = [Y_old[j] for j in which_lab]
            all_unweighted_scores = np.empty([np.shape(X_old)[0],best_m])
            for i, (name, model) in enumerate(models.items()):
                model.fit(X_old)
                y_score = model.score_samples(X_old)
                y_score.dtype = np.float64
                all_unweighted_scores[:,i] = y_score.squeeze()
    
            all_labeled_scores = all_unweighted_scores[which_lab,:]
    
    #LOOPING OVER BATCHES:
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    GLAD_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Calculate all of the scores for X_new:
        all_scores = np.empty([np.shape(X_new)[0],best_m])
        for i, (name, model) in enumerate(models.items()):
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            all_scores[:,i] = y_score.squeeze()
    
        #Update:
        all_unweighted_scores = np.concatenate([all_unweighted_scores,all_scores])
    
        # Pass X_new through the current state of the neural network in order to get the weights out:
        curr_w = model_GLAD.predict(X_new)

        X_new_final_scores = np.sum(all_scores * curr_w, axis=1)
    
        # Sort these scores:
        sorted_indices = np.argsort(X_new_final_scores)
        sorted_scores = X_new_final_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]


        
        #print('top k indices:',top_k_indices)
        # Add the scores of the top_k data points to all_labeled_scores:
        all_labeled_scores = np.concatenate([all_labeled_scores,all_scores[top_k_indices,:]])
    
        # Add the relevant data points to X_lab
        X_lab = np.concatenate([X_lab,X_new[top_k_indices,:]])
    
        # Get the true labels of these data points and add them to Y_lab
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
    
        #Update X_so_far (all X data so far):
        X_so_far = X[:(n_old + (r+1)*B),:]
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_GLAD[curr_trial, r] = nY1
    
        curr_w_ext = model_GLAD.predict(X_AUC)
        X_new_final_scores_ext = np.zeros((np.shape(X_AUC)[0],))
        for k in range(np.shape(X_AUC)[0]):
            X_new_final_scores_ext[k] = np.matmul(new_unweighted_validation_scores[k,:],np.transpose(curr_w_ext[k,:]))
        
        GLAD_AUC[r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
        all_GLAD_AUC[curr_trial,r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
    
        fpr, tpr, _ = roc_curve(Y_AUC,X_new_final_scores_ext)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)

        #Check if this is the first time through:
        if q_tau_tm1 == -10e7:
            #Dealing with q_tau_tm1
            all_weights_so_far = model_GLAD.predict(X_so_far)
            all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
            for k in range(np.shape(X_so_far)[0]):
                all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

            # Calculate the quantile index without fully sorting
            quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
            partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
            q_tau_tm1 = partitioned_array[quantile_index]

        # 1. Identify the indices of anomalies (Y_lab == 1)
        anomaly_indices = np.where(np.array(Y_lab) == 1)[0]
        
        # 2. Repeat the corresponding rows in X_lab and all_labeled_scores (4 copies for each anomaly)
        X_lab_temp = np.vstack([X_lab] + [X_lab[anomaly_indices]] * 4)  # Repeat rows of X_lab for anomalies
        all_labeled_scores_temp = np.vstack([all_labeled_scores] + [all_labeled_scores[anomaly_indices]] * 4)  # Repeat rows of all_labeled_scores for anomalies
        X_so_far_temp = np.vstack([X_so_far] + [X_lab[anomaly_indices]] * 4)

        # 3. Extend Y_lab_temp with four 1s for each anomaly (4 copies for each anomaly)
        Y_lab_temp = np.concatenate([Y_lab, [1] * 4 * len(anomaly_indices)])

        #model_GLAD.compile(optimizer='adam', loss=lambda y_true, y_pred: new_custom_loss(X_lab, Y_lab, q_tau_tm1, all_labeled_scores,model_GLAD, X_so_far, mylambda, b), metrics=['accuracy'])
        model_GLAD.compile(optimizer='adam', loss=lambda y_true, y_pred: new_custom_loss_2(X_lab_temp, Y_lab_temp, q_tau_tm1, all_labeled_scores_temp,model_GLAD, X_so_far_temp, mylambda, b), metrics=['accuracy'])
        
        # Train the model for further iterations (e.g., 10 epochs)
        #model_GLAD.fit(X_lab, np.array(Y_lab), epochs=100, batch_size=32,verbose=0)
        model_GLAD.fit(X_lab_temp, np.array(Y_lab_temp), epochs=10, batch_size=32,verbose=0)
    
        #Dealing with q_tau_tm1
        all_weights_so_far = model_GLAD.predict(X_so_far)
        all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
        for k in range(np.shape(X_so_far)[0]):
            all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))
        
        # Calculate the quantile index without fully sorting
        quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
        partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
        q_tau_tm1 = partitioned_array[quantile_index]

    
    
        #Update indices:
        curr_L_index = curr_L_index + B

# Calculate column averages for each array
avg_LODA_2 = np.mean(all_LODA_AUC, axis=0)
avg_active_LODA_2 = np.mean(all_active_LODA_AUC, axis=0)
avg_GLAD_2 = np.mean(all_GLAD_AUC, axis=0)

avg_nY1_LODA_2 = np.mean(all_nY1_LODA, axis=0)
avg_nY1_active_LODA_2 = np.mean(all_nY1_active_LODA, axis=0)
avg_nY1_GLAD_2 = np.mean(all_nY1_GLAD, axis=0)

#Save these results:
np.savez("AUC_2.npz",
         avg_LODA_2 = avg_LODA_2,
         avg_active_LODA_2 = avg_active_LODA_2,
         avg_GLAD_2 = avg_GLAD_2,
         avg_nY1_LODA_2 = avg_nY1_LODA_2,
         avg_nY1_active_LODA_2 = avg_nY1_active_LODA_2,
         avg_nY1_GLAD_2 = avg_nY1_GLAD_2,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_2.npz",
         weighted_scores_2 = weighted_scores,
         weighted_validation_scores_2 = weighted_validation_scores,
         X_new_final_scores_ext_2 = X_new_final_scores_ext,
        )

### Load the AUC and anomaly count data, and plot them

In [None]:
# Ours
data_2_onlyAAA = np.load("AUC_2_onlyAAA.npz")
avg_AAA_2 = data_2_onlyAAA["avg_AAA_2"]
avg_nY1_AAA_2 = data_2_onlyAAA["avg_nY1_AAA_2"]  
Y_AUC_2 = data_2_onlyAAA["Y_AUC_2_onlyAAA"] 

# The others
data_2 = np.load("AUC_2.npz")
avg_LODA_2 = data_2["avg_LODA_2"]
avg_active_LODA_2 = data_2["avg_active_LODA_2"]
avg_GLAD_2 = data_2["avg_GLAD_2"]
avg_nY1_LODA_2 = data_2["avg_nY1_LODA_2"]
avg_nY1_active_LODA_2 = data_2["avg_nY1_active_LODA_2"]
avg_nY1_GLAD_2 = data_2["avg_nY1_GLAD_2"]


### Plot the AUCs

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the column averages
plt.figure(figsize=(10, 6))

plt.plot(xx,avg_LODA_2, label="LODA", marker='o')
plt.plot(xxactiveLODA,avg_active_LODA_2, label="Active-LODA", marker='s')
plt.plot(xx,avg_GLAD_2, label="GLAD", marker='^')
plt.plot(xx,avg_AAA_2, label="AAA", marker='d')

# Add plot title and labels
plt.xlabel("Batch", fontsize=14)
plt.ylabel("Average AUC", fontsize=14)

plt.xticks(ticks=xx)

# Add legend
plt.legend(loc="best", fontsize=12)

# Add grid for better readability
plt.grid(True)

# Show the plot
plt.show()

### Plot the cumulative number of anomalies detected

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the four lines
plt.plot(xx, avg_nY1_LODA_2, label="LODA", marker='o')
plt.plot(xxactiveLODA, avg_nY1_active_LODA_2, label="Active-LODA", marker='s')
plt.plot(xx, avg_nY1_GLAD_2, label="GLAD", marker='^')
plt.plot(xx, avg_nY1_AAA_2, label="AAA", marker='d')

# Labels and title
plt.xlabel("Batch")
plt.ylabel("Anomalies detected")
plt.title("Cumulative anomalies detected")

# Legend
plt.legend()

# Grid for better readability
plt.grid(True, linestyle='--', alpha=0.6)

# Show the plot
plt.show()

### Plot the score densities for each method after the final batch

Here we want to see the effect of each method on the score density after running through all of the batches.  

In [None]:
# Ours
scores_2_onlyAAA = np.load("scores_2_onlyAAA.npz")
new_preds_2 = scores_2_onlyAAA["new_preds_2"]

# The others
scores_2 = np.load("scores_2.npz")
weighted_scores_2 = scores_2["weighted_scores_2"]
weighted_validation_scores_2 = scores_2["weighted_validation_scores_2"]
X_new_final_scores_ext_2 = scores_2["X_new_final_scores_ext_2"]

In [None]:
score_arrays = [weighted_scores_2, weighted_validation_scores_2, X_new_final_scores_ext_2, new_preds_2]
titles = ["LODA", "Active-LODA", "GLAD", "AAA"]

# Set font sizes
title_fontsize = 16  
label_fontsize = 13  
legend_fontsize = 14 
tick_fontsize = 12

# Create subplots
fig, axes = plt.subplots(2, 2, figsize=(10, 8))  # 2x2 grid
axes = axes.flatten()  # Flatten for easy iteration

for i, scores in enumerate(score_arrays):
    ax = axes[i]

    # Define x-axis range
    x_min, x_max = np.min(scores), np.max(scores)
    x_vals = np.linspace(x_min, x_max, 200)

    # Split into nominal and anomaly
    nominal_scores = scores[Y_AUC_2 == 0]
    anomaly_scores = scores[Y_AUC_2 == 1]

    # KDE with proper scaling
    if len(nominal_scores) > 1:
        kde_nominal = gaussian_kde(nominal_scores)
        nominal_density = kde_nominal(x_vals) * (1 - tau)
        ax.plot(x_vals, nominal_density, label="Nominal (Y=0)", color="blue")

    if len(anomaly_scores) > 1:
        kde_anomaly = gaussian_kde(anomaly_scores)
        anomaly_density = 10 * kde_anomaly(x_vals) * tau
        ax.plot(x_vals, anomaly_density, label="Anomaly (Y=1)", color="red")

    # Formatting
    ax.set_title(f"{titles[i]} (c=2)", fontsize=title_fontsize)
    ax.set_xlabel("Score", fontsize=label_fontsize)
    ax.set_ylabel("Aggregated score 'density'", fontsize=label_fontsize)
    ax.legend(fontsize=legend_fontsize)
    ax.grid(True)
    ax.tick_params(axis='both', labelsize=tick_fontsize)

    if i == 0:
        ax.set_ylim(0, 1.55)
    if i == 1:
        ax.set_ylim(0, 0.48)
    if i == 2:
        ax.set_ylim(0, 0.17)
    if i == 3:
        ax.set_ylim(0, 16.5)

# Adjust layout
plt.tight_layout()
plt.show()

fig.savefig("ScoreDensities_2.pdf", format="pdf", bbox_inches="tight")

## $c = 1.5$

### Our method

In [None]:
np.random.seed(123456789)

In [None]:
############################################################################
# Two-dimensional Gaussian nominals with one two-dimensional Gaussian anomaly 
############################################################################

#Choose 0.5, 1, 1.5 or 2 for the centre of the anomaly distribution
c = 1.5

all_AAA_AUC = np.zeros((n_trials,n_loops))

all_nY1_AAA = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    # Specific arguments:
    a_list = [[c,c]]
    anomaly_cov_list = [ 
        [[0.1,0],[0,0.1]]
    ]
    nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same mixture distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    
    # Your code for plotting data
    plt.figure(figsize=(10, 8))
    
    # Plot density of nominals (blue)
    sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6)
    
    # Plot density of anomalies (red)
    sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6)
    
    
    # Customizing the plot
    plt.title("Density Plot (2D)", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Manually create legend
    legend_elements = [
        Line2D([0], [0], color='blue', lw=2, label="Nominals"),
        Line2D([0], [0], color='red', lw=2, label="Anomalies")
    ]
    
    plt.legend(handles=legend_elements, loc='upper right')  # Manually adding legend
    
    plt.grid(True)
    plt.show()
    
    
    plt.figure(figsize=(10, 8))
    
    # Scatter plot for nominals (blue)
    plt.scatter(X[Y == 0][:, 0], X[Y == 0][:, 1], color='blue', alpha=0.6, label="Nominals")
    
    # Scatter plot for anomalies (red)
    plt.scatter(X[Y == 1][:, 0], X[Y == 1][:, 1], color='red', alpha=0.6, label="Anomalies")
    
    # Customizing the plot
    plt.title("Scatter Plot of Nominals and Anomalies", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Adding legend
    plt.legend(loc='upper right')  # Automatically adding legend with labels from scatter plots
    
    # Adding grid
    plt.grid(True)
    
    # Showing the plot
    plt.show()
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        print('There is no initial data.')
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    

    #####################################################################################################################
    # Our trials: 
    
    #Choose a supervised method that will be applied:
    supervised_method = 'LogisticRegression'

    #Initialization

    #Dealing with edge cases:
    if n_old == 0:
        X_old = None
        Y_old = None
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted.tolist()  #We directly use Y_muted, as in all the other methods
    
    # Run the initialization function InitActiveAGG:
    X_lab, Y_lab, all_labeled_scores = InitActiveAGG(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models)


    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    AAA_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Learn from labeled data, propose new predicted anomalies, and propose other data to label:
        X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 0.6,min_n_nom=5,min_n_anom=1,tau_exp=tau)  
        
        # Pretend to be the expert and add the true labels to the proposed data:
        expert_provided_labels = [Y_new[j] for j in indices_to_expert]
        Y_lab = Y_lab + expert_provided_labels
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_AAA[curr_trial, r] = nY1
        
        #Test the current learned model on the external data in order to calculate the AUC:
        #new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:, 1]
        if learned_model != None:
            new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:,1]
            AAA_AUC[r] = roc_auc_score(Y_AUC,new_preds)
            all_AAA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,new_preds)
        else:
            AAA_AUC[r] = 0.5
            all_AAA_AUC[curr_trial,r] = 0.5
            
        
        
        #Update indices:
        curr_L_index = curr_L_index + B

# Calculate column averages for each array
avg_AAA_1p5 = np.mean(all_AAA_AUC, axis=0)
avg_nY1_AAA_1p5 = np.mean(all_nY1_AAA, axis=0)


#Save these results:
np.savez("AUC_1p5_onlyAAA.npz",
         avg_AAA_1p5 = avg_AAA_1p5,
         avg_nY1_AAA_1p5 = avg_nY1_AAA_1p5,
        )


# Save also the score matrices for later (for the plot below)
np.savez("scores_1p5_onlyAAA.npz",
         new_preds_1p5 = new_preds
        )

### The other three methods

In [None]:
np.random.seed(123456789)

In [None]:
############################################################################
# Two-dimensional Gaussian nominals with one two-dimensional Gaussian anomaly 
############################################################################

#Choose 0.5, 1, 1.5 or 2 for the centre of the anomaly distribution
c = 1.5

all_LODA_AUC = np.zeros((n_trials,n_loops))
all_active_LODA_AUC = np.zeros((n_trials,int(n_loops/2)))
all_GLAD_AUC = np.zeros((n_trials,n_loops))

all_nY1_LODA = np.zeros((n_trials, n_loops))  # Array to store nY1 values
all_nY1_active_LODA = np.zeros((n_trials, int(n_loops/2)))  # Array to store nY1 values
all_nY1_GLAD = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    # Specific arguments:
    a_list = [[c,c]]
    anomaly_cov_list = [ 
        [[0.1,0],[0,0.1]]
    ]
    nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same mixture distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    
    # Your code for plotting data
    plt.figure(figsize=(10, 8))
    
    # Plot density of nominals (blue)
    sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6)
    
    # Plot density of anomalies (red)
    sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6)
    
    
    # Customizing the plot
    plt.title("Density Plot (2D)", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Manually create legend
    legend_elements = [
        Line2D([0], [0], color='blue', lw=2, label="Nominals"),
        Line2D([0], [0], color='red', lw=2, label="Anomalies")
    ]
    
    plt.legend(handles=legend_elements, loc='upper right')  # Manually adding legend
    
    plt.grid(True)
    plt.show()
    
    
    plt.figure(figsize=(10, 8))
    
    # Scatter plot for nominals (blue)
    plt.scatter(X[Y == 0][:, 0], X[Y == 0][:, 1], color='blue', alpha=0.6, label="Nominals")
    
    # Scatter plot for anomalies (red)
    plt.scatter(X[Y == 1][:, 0], X[Y == 1][:, 1], color='red', alpha=0.6, label="Anomalies")
    
    # Customizing the plot
    plt.title("Scatter Plot of Nominals and Anomalies", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Adding legend
    plt.legend(loc='upper right')  # Automatically adding legend with labels from scatter plots
    
    # Adding grid
    plt.grid(True)
    
    # Showing the plot
    plt.show()
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        print('There is no initial data.')
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    
    #####################################################################################################################
    # LODA TRIALS
    
    curr_L_index = n_old
    LODA_AUC = [0]*n_loops
        
    weighted_scores=np.mean(new_unweighted_validation_scores,axis=1)

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(n_loops):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Final LODA scores are averages over anomaly detectors
        new_LODA_scores = np.mean(new_unweighted_scores,axis=1)

        #############################################################
        # Sort these scores:
        top_k = n_send
        sorted_indices = np.argsort(new_LODA_scores)
        sorted_scores = new_LODA_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_LODA[curr_trial,r] = nY1
        #############################################################
        
        #############################################################
        LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_scores)
        all_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)

    #####################################################################################################################
    # ACTIVE-LODA trials:
    
    #We do however have to provide C_tau since it needs to be used before the optimization
    #function. C_tau = 0.03 is the default suggested in Das et al. (2016).
    C_tau = tau
    
    #There are hyperparameters that need to be set in advance for this algorithm. However, 
    #for simplicity we assume they tal the default values in the function optimize_w.
    #C_A = 100  #default in their article
    #C_eta = 1000. #default in their article
    
    #In active LODA, the whole budget of n_send items per loop is dedicated to sending the 
    #items with highest predicted scores:
    top_k = n_send
    
    #So now that we have the LODA projectors (i.e., a set of best_m anomaly detectors),
    #we can begin.
    
    #We shall initialize the vector of weights as being equal and summing to 1:
    w_old = np.array([1/best_m for i in range(best_m)])
    
    #We also initialize arrays to put the unweighted scores of labeled data into:
    H_A = np.empty((0, best_m))
    H_N = np.empty((0, best_m))
    
    #We also initialize a fake anomaly alert to 0 (see below). This means basically that
    #we have not so far had to add a "fake anomaly" to the optimization due to there only
    #being labeled nominals so far.
    fake_anomaly = 0
    
    #Unlike basic LODA, here anomaly and nominal labels MATTER. In particular, at the
    #beginning, it matters whether there is initial "old" data, and if so, whether some
    #or all of it is already labeled. If n_old > 0, then we have already calculated Y_muted
    #earlier in this script (for some fixed percentage u of this "old" data for which we
    #suppose we know its true label)
    
    #If there were initial data and at least one labeled nominal (following Das et al. (2016))
    if n_old > 0 and np.sum(Y_muted == 0) > 0:
            
        #Extract the initial data from X:
        X_new = X[:n_old,:]
    
        #Calculate the unweighted scores for each LODA projector on the initial data: 
        new_unweighted_scores = np.empty((n_old,best_m))
    
        for i, (name, model) in enumerate(models.items()):
    
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
    
        #Calculate the sum of the linear combination of these scores weighted by w_old:
        new_scores = np.matmul(new_unweighted_scores,w_old)
    
        #Sort new_scores from smallest to largest, whilst retaining the indices.
        sorted_indices = np.argsort(new_scores)
        sorted_scores = new_scores[sorted_indices]
    
        #Calculate q_tau on this initial data: 
        #WARNING: one of the underlying problems with active-LODA is that it basically expects
        #anomalies to have the highest scores from the get go. But here, in this first loop,
        #it very well could be that the anomalies have all the LOWEST scores. The calculation
        #of q_tau at this point is therefore a bit weird/pointless. Also, active-LODA basically
        #expects positive weights, especially in its minimization step, with its L2 norm penalty
        #on the weights. This means that active-LODA does not reach its true potential, as
        #defined and coded by Das et al. (2016). The trouble is is that without a penalty on 
        #making the weights not too big, not too small, and not necessarily positive, it will
        #remain suboptimal. 
        my_quantile_sorted_index = int(np.floor(n_old*(1-C_tau)))-1*(C_tau != 1)
        q_tau = sorted_scores[my_quantile_sorted_index]
    
        #Create the arrays H_A and H_N:
        #First we take H_A. In Das et al. (2016) they allow for the case that there
        #are no labeled anomalies, only labeled nominals.
        if np.sum(Y_muted == 1) == 0:
            #Set a "fake anomaly alert to 1":
            fake_anomaly = 1
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            H_N = new_unweighted_scores[(Y_muted==0),:]
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
                    
        else:
            #There is at least one labeled anomaly:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            H_N = new_unweighted_scores[(Y_muted==0),:]
     
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
            
            temp_unweighted = np.concatenate([H_A,H_N])
            temp_pred = np.matmul(temp_unweighted,w_new)
            temp_YA = [1]*np.shape(H_A)[0]
            temp_YN = [0]*np.shape(H_N)[0]
            temp_Y = temp_YA + temp_YN
            
        #We can now update w_old with the value of w_new. If this whole big loop wasn't run,
        #then w_old will stay at its original value.
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
    
    #Else if there were initial data but no labeled nominals: Das et al. (2016)
    #provide no details for what to do here. It is possible though unlikely that
    #in the initial data, there were no labeled nominals but there was at least
    #one labeled anomaly. We do have to check this, since we will need to initialize
    #H_A in this case, even if H_N is empty.
    elif n_old > 0 and np.sum(Y_muted == 0) == 0:
        if np.sum(Y_muted == 1) > 0:
            #initialize H_A:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            #Potential Python issue whereby if there is just one anomaly, weird
            #things happen with array shapes. To guard against this:
            if np.sum(Y_muted == 1) == 1:
                if np.shape(H_A) != (1,best_m):
                    H_A = H_A.reshape(1,-1)
                
    #So, at this point, either we had no old data and w_old is still equal weights, 
    #with H_A, H_N empty arrays, or else there was old data, and H_A and H_N may 
    #have been added to (or not), while w_old may or may not have already been 
    #updated.
    
    #We next move to the batch data.
    
    curr_L_index = n_old
    active_LODA_AUC = [0]*n_loops

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(int(n_loops/2)):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        n_new_true_anom = sum(Y_new==1)
        
        #Update for the next loop:
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Temporary final active-LODA scores are a linear combination over anomaly detectors:
        temp_new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        #Calculated the weighted scores on the external validation set with the current
        #value of w_old:
        weighted_validation_scores = np.matmul(new_unweighted_validation_scores,w_old)
        active_LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        all_active_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_validation_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)

        #We actually have to, at this point, attach the current versions of H_A
        #and H_N to new_unweighted_scores, since in this batch framework, we do
        #not have a fixed number of data points from the start to the finish, like
        #they do in Das et al. (2016). If we do not do this, it will affect the
        #calculation of q_tau over time (a kind of bias will be introduced, maybe
        #not the end of the world, but still.)
        
        new_unweighted_scores = np.concatenate([new_unweighted_scores,H_A,H_N])
        
        #Actual final active-LODA scores are a linear combination over anomaly detectors:
        new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        temp_Y = Y_new.tolist() + [1]*np.shape(H_A)[0] + [0]*np.shape(H_N)[0]

        #Following the methodology in Das et al. (2016), we should provide the 
        #highest scoring data-point to an expert for labeling. In order to be
        #slightly more general, we shall instead provide the top_k scoring data
        #points to the expert, where top_k has been pre-defined.
        
        #Sort new_active_LODA_scores from smallest to largest, whilst retaining the indices.
        #Remember that these may include items from the previous loop or from the initialization.
        #However, since we appended those on to the end, we know their indices will be B, B+1,...
        #so we will be able to look out for them.
        sorted_indices = np.argsort(new_active_LODA_scores)
        sorted_scores = new_active_LODA_scores[sorted_indices]
    
        my_quantile_sorted_index = int(np.floor(len(new_active_LODA_scores)*(1-C_tau)))-1*(C_tau != 1)
        
        #We now have to go through the sorted_indices from the end back towards the 
        #beginning until we manage to gather top_k indices which are less than or equal to B-1.
        top_k_indices = []
        n_indices_so_far = 0
        curr_index = len(sorted_indices)
        while n_indices_so_far < top_k:
            curr_index = curr_index - 1
            next_top_index = sorted_indices[curr_index]
            if next_top_index < B:
                top_k_indices.append(next_top_index)
                n_indices_so_far = n_indices_so_far + 1
                
        #Now we go and get the labels in Y_new associated with these indices:
        #top_k_indices = np.array(top_k_indices)
        Y_expert = Y_new[top_k_indices]

        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_active_LODA[curr_trial,r] = nY1
        
        #Now is a good time to check whether we just found at least one real anomaly while
        #up to now we only had one fake anomaly:
        if sum(Y_expert==1) > 0 and fake_anomaly==1:
            #We now reset H_A to be empty:
            H_A = np.empty((0, best_m))
            #And we set fake_anomaly to 0 forever:
            fake_anomaly = 0
            
        if sum(Y_expert==1) == 0 and fake_anomaly==1:
            #We reset H_A back to empty again:
            H_A = np.empty((0, best_m))
                
        #We then need to append the relevant unweighted scores to the current H_A and H_N
        for j in range(top_k):
            if Y_expert[j]==1:
                H_A = np.concatenate([H_A,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
            else:
                H_N = np.concatenate([H_N,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
                
        #Here we have to check whether this was the first time H_A actually contained
        #a real anomaly or not, rather than a temporary one (if there were no labeled anomalies)    
        
        #Now since top_k > 0 we know that there is a positive number of labeled data for sure.
        #We always have to retain however the possibility that all of the labeled data so far
        #are nominals. 
        if np.shape(H_A)[0]==0:
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
        else:
            w_new = optimize_w_2(H_A, H_N, q_tau)
    
                
    
        #Update q_tau on this data:
        q_tau = sorted_scores[my_quantile_sorted_index]
        
        #update w_old
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
        temp_unweighted = np.concatenate([H_A,H_N])
        temp_pred = np.matmul(temp_unweighted,w_old)
        temp_YA = [1]*np.shape(H_A)[0]
        temp_YN = [0]*np.shape(H_N)[0]
        temp_Y = temp_YA + temp_YN
    
    #####################################################################################################################
    # GLAD trials:
    
    #Initialize some parameters:
    b = 0.5
    mylambda = 1
    top_k = n_send
    q_tau_tm1 = -10e7
    
    # Build the model:
    model_GLAD = build_neural_network(np.shape(X)[1], best_m)
    
    # Compile the model with the custom loss function:
    model_GLAD.compile(optimizer='adam', loss=custom_binary_crossentropy_loss(b=b,mylambda=mylambda), metrics=['accuracy'])
    
    # Train the model
    # We use the same n_min used earlier to find the number of LODA projections:
    y_true = np.full((n_min, best_m), b)
    model_GLAD.fit(X[:n_min,:], y_true, epochs=10, batch_size=32,verbose=0)
    
    # Sanity check: the output for each data point should all be very close to b:
    row = X[1:2] 
    output = model_GLAD.predict(row)
    
    #INITIALIZATION
    #Dealing with edge cases:
    if n_old == 0:
        X_lab = np.empty([0,np.shape(X)[1]])
        Y_lab = []
        all_labeled_scores = np.empty([0,best_m])
        all_unweighted_scores = np.empty([0,best_m])
    
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted  #We directly use Y_muted, as in all the other methods
        which_lab = [i for i in range(len(Y_old)) if not np.isnan(Y_old[i])]
    
        if len(which_lab) == 0:
            X_lab = np.empty([0,np.shape(X)[1]])
            Y_lab = [] 
            all_unweighted_scores = np.empty([0,best_m])
            all_labeled_scores = np.empty([0,best_m])
        else:
            X_lab = X_old[which_lab,:]
            Y_lab = [Y_old[j] for j in which_lab]
            all_unweighted_scores = np.empty([np.shape(X_old)[0],best_m])
            for i, (name, model) in enumerate(models.items()):
                model.fit(X_old)
                y_score = model.score_samples(X_old)
                y_score.dtype = np.float64
                all_unweighted_scores[:,i] = y_score.squeeze()
    
            all_labeled_scores = all_unweighted_scores[which_lab,:]
    
    #LOOPING OVER BATCHES:
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    GLAD_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Calculate all of the scores for X_new:
        all_scores = np.empty([np.shape(X_new)[0],best_m])
        for i, (name, model) in enumerate(models.items()):
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            all_scores[:,i] = y_score.squeeze()
    
        #Update:
        all_unweighted_scores = np.concatenate([all_unweighted_scores,all_scores])
    
        # Pass X_new through the current state of the neural network in order to get the weights out:
        curr_w = model_GLAD.predict(X_new)

        X_new_final_scores = np.sum(all_scores * curr_w, axis=1)
    
        # Sort these scores:
        sorted_indices = np.argsort(X_new_final_scores)
        sorted_scores = X_new_final_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        # Add the scores of the top_k data points to all_labeled_scores:
        all_labeled_scores = np.concatenate([all_labeled_scores,all_scores[top_k_indices,:]])
    
        # Add the relevant data points to X_lab
        X_lab = np.concatenate([X_lab,X_new[top_k_indices,:]])
    
        # Get the true labels of these data points and add them to Y_lab
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
    
        #Update X_so_far (all X data so far):
        X_so_far = X[:(n_old + (r+1)*B),:]
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_GLAD[curr_trial, r] = nY1
    
        curr_w_ext = model_GLAD.predict(X_AUC)
        X_new_final_scores_ext = np.zeros((np.shape(X_AUC)[0],))
        for k in range(np.shape(X_AUC)[0]):
            X_new_final_scores_ext[k] = np.matmul(new_unweighted_validation_scores[k,:],np.transpose(curr_w_ext[k,:]))
        
        GLAD_AUC[r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
        all_GLAD_AUC[curr_trial,r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
    
        fpr, tpr, _ = roc_curve(Y_AUC,X_new_final_scores_ext)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
        #Check if this is the first time through:
        if q_tau_tm1 == -10e7:
            #Dealing with q_tau_tm1
            all_weights_so_far = model_GLAD.predict(X_so_far)
            all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
            for k in range(np.shape(X_so_far)[0]):
                all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

            # Calculate the quantile index without fully sorting
            quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
            partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
            q_tau_tm1 = partitioned_array[quantile_index]

        # 1. Identify the indices of anomalies (Y_lab == 1)
        anomaly_indices = np.where(np.array(Y_lab) == 1)[0]
        
        # 2. Repeat the corresponding rows in X_lab and all_labeled_scores (4 copies for each anomaly)
        X_lab_temp = np.vstack([X_lab] + [X_lab[anomaly_indices]] * 4)  # Repeat rows of X_lab for anomalies
        all_labeled_scores_temp = np.vstack([all_labeled_scores] + [all_labeled_scores[anomaly_indices]] * 4)  # Repeat rows of all_labeled_scores for anomalies
        X_so_far_temp = np.vstack([X_so_far] + [X_lab[anomaly_indices]] * 4)

        # 3. Extend Y_lab_temp with four 1s for each anomaly (4 copies for each anomaly)
        Y_lab_temp = np.concatenate([Y_lab, [1] * 4 * len(anomaly_indices)])
        
        model_GLAD.compile(optimizer='adam', loss=lambda y_true, y_pred: new_custom_loss_2(X_lab_temp, Y_lab_temp, q_tau_tm1, all_labeled_scores_temp,model_GLAD, X_so_far_temp, mylambda, b), metrics=['accuracy'])

        
        # Train the model for further iterations (e.g., 10 epochs)
        model_GLAD.fit(X_lab_temp, np.array(Y_lab_temp), epochs=10, batch_size=32,verbose=0)

        #Dealing with q_tau_tm1
        all_weights_so_far = model_GLAD.predict(X_so_far)
        all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
        for k in range(np.shape(X_so_far)[0]):
            all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

        # Calculate the quantile index without fully sorting
        quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
        partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
        q_tau_tm1 = partitioned_array[quantile_index]
        
    
        #Update indices:
        curr_L_index = curr_L_index + B
    
# Calculate column averages for each array
avg_LODA_1p5 = np.mean(all_LODA_AUC, axis=0)
avg_active_LODA_1p5 = np.mean(all_active_LODA_AUC, axis=0)
avg_GLAD_1p5 = np.mean(all_GLAD_AUC, axis=0)

avg_nY1_LODA_1p5 = np.mean(all_nY1_LODA, axis=0)
avg_nY1_active_LODA_1p5 = np.mean(all_nY1_active_LODA, axis=0)
avg_nY1_GLAD_1p5 = np.mean(all_nY1_GLAD, axis=0)


#Save these results:
np.savez("AUC_1p5.npz",
         avg_LODA_1p5 = avg_LODA_1p5,
         avg_active_LODA_1p5 = avg_active_LODA_1p5,
         avg_GLAD_1p5 = avg_GLAD_1p5,
         avg_nY1_LODA_1p5 = avg_nY1_LODA_1p5,
         avg_nY1_active_LODA_1p5 = avg_nY1_active_LODA_1p5,
         avg_nY1_GLAD_1p5 = avg_nY1_GLAD_1p5,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_1p5.npz",
         weighted_scores_1p5 = weighted_scores,
         weighted_validation_scores_1p5 = weighted_validation_scores,
         X_new_final_scores_ext_1p5 = X_new_final_scores_ext,
        )

### Load AUC and anomaly count data and plot it

In [None]:
# Ours
data_1p5_onlyAAA = np.load("AUC_1p5_onlyAAA.npz")
avg_AAA_1p5 = data_1p5_onlyAAA["avg_AAA_1p5"]
avg_nY1_AAA_1p5 = data_1p5_onlyAAA["avg_nY1_AAA_1p5"]


# The others
data_1p5 = np.load("AUC_1p5.npz")
avg_LODA_1p5 = data_1p5["avg_LODA_1p5"]
avg_active_LODA_1p5 = data_1p5["avg_active_LODA_1p5"]
avg_GLAD_1p5 = data_1p5["avg_GLAD_1p5"]
avg_nY1_LODA_1p5 = data_1p5["avg_nY1_LODA_1p5"]
avg_nY1_active_LODA_1p5 = data_1p5["avg_nY1_active_LODA_1p5"]
avg_nY1_GLAD_1p5 = data_1p5["avg_nY1_GLAD_1p5"]

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the column averages
plt.figure(figsize=(10, 6))

plt.plot(xx,avg_LODA_1p5, label="LODA", marker='o')
plt.plot(xxactiveLODA,avg_active_LODA_1p5, label="Active-LODA", marker='s')
plt.plot(xx,avg_GLAD_1p5, label="GLAD", marker='^')
plt.plot(xx,avg_AAA_1p5, label="AAA", marker='d')

# Add plot title and labels
plt.xlabel("Batch", fontsize=14)
plt.ylabel("Average AUC", fontsize=14)

plt.xticks(ticks=xx)

# Add legend
plt.legend(loc="best", fontsize=12)

# Add grid for better readability
plt.grid(True)

# Show the plot
plt.show()

### Plot of cumulative number of anomalies detected

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the four lines
plt.plot(xx, avg_nY1_LODA_1p5, label="LODA", marker='o')
plt.plot(xxactiveLODA, avg_nY1_active_LODA_1p5, label="Active-LODA", marker='s')
plt.plot(xx, avg_nY1_GLAD_1p5, label="GLAD", marker='^')
plt.plot(xx, avg_nY1_AAA_1p5, label="AAA", marker='d')

# Labels and title
plt.xlabel("Batch")
plt.ylabel("Anomalies detected")
plt.title("Cumulative anomalies detected")

# Legend
plt.legend()

# Grid for better readability
plt.grid(True, linestyle='--', alpha=0.6)

# Show the plot
plt.show()

### Cumulative percentage of queries which are true anomalies over time

In [None]:
# Initialize a list to hold the new transformed values
transformed_values = []

# Loop over the values in avg_nY1_AAA_1p5 and apply the transformation
for i in range(n_loops):
    transformed_value = avg_nY1_AAA_1p5[i] / (100 + 5 * i)
    transformed_values.append(transformed_value)

# Define the x-axis (batches/loops)
xx = list(range(1, n_loops + 1))

# Plot the original values
plt.figure(figsize=(10, 6))

# Plot the transformed values
plt.plot(xx, transformed_values, marker='o', linestyle='-', color='red')

# Labels and title
plt.xlabel("Batch", fontsize=14)
plt.ylabel("Fraction of labeled data that is an anomaly", fontsize=14)

# Add grid for better readability
plt.grid(True, linestyle='--', alpha=0.6)

# Show the plot
plt.show()

### Fitting functions to this curve

In [None]:
from scipy.optimize import curve_fit

# Your existing code for transformation
transformed_values = []

for i in range(n_loops):
    transformed_value = avg_nY1_AAA_1p5[i] / (100 + 5 * i)
    transformed_values.append(transformed_value)

xx = list(range(1, n_loops + 1))

# Logarithmic fit function
def log_func(x, a, b):
    return a * np.log(x) + b

# Hyperbolic fit function
def hyperbola_func(x, a, b, c):
    return a / (x + b) + c

# Fit the transformed values using the logarithmic function
popt_log, _ = curve_fit(log_func, xx, transformed_values, maxfev=10000)

# Fit the transformed values using the hyperbolic function
popt_hyper, _ = curve_fit(hyperbola_func, xx, transformed_values, maxfev=10000)

# Generate fitted curves for plotting
log_fit_values = log_func(np.array(xx), *popt_log)
hyper_fit_values = hyperbola_func(np.array(xx), *popt_hyper)

# Plotting the transformed values and fitted curves
plt.figure(figsize=(10, 6))

# Plot the transformed values
plt.plot(xx, transformed_values, marker='o', linestyle='-', color='red', label="Transformed Values")

# Plot the logarithmic fit
plt.plot(xx, log_fit_values, linestyle='--', color='blue', label=f"Logarithmic Fit: y = {popt_log[0]:.2f}*ln(x) + {popt_log[1]:.2f}")

# Plot the hyperbolic fit
plt.plot(xx, hyper_fit_values, linestyle='--', color='green', label=f"Hyperbolic Fit: y = {popt_hyper[0]:.2f}/(x + {popt_hyper[1]:.2f}) + {popt_hyper[2]:.2f}")

# Labels and title
plt.xlabel("Batch", fontsize=14)
plt.ylabel("Fraction of labeled data that is an anomaly", fontsize=14)
plt.title("Fitting Transformed Values with Logarithmic and Hyperbolic Functions", fontsize=16)

# Add legend
plt.legend()

# Add grid for better readability
plt.grid(True, linestyle='--', alpha=0.6)

# Show the plot
plt.show()

## $c=1$

### Our method

In [None]:
np.random.seed(123456789)

In [None]:
############################################################################
# Two-dimensional Gaussian nominals with one two-dimensional Gaussian anomaly 
############################################################################

c = 1

all_AAA_AUC = np.zeros((n_trials,n_loops))

all_nY1_AAA = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    # Specific arguments:
    a_list = [[c,c]]
    anomaly_cov_list = [ 
        [[0.1,0],[0,0.1]]
    ]
    nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same mixture distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    
    # Your code for plotting data
    plt.figure(figsize=(10, 8))
    
    # Plot density of nominals (blue)
    sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6)
    
    # Plot density of anomalies (red)
    sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6)
    
    
    # Customizing the plot
    plt.title("Density Plot (2D)", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Manually create legend
    legend_elements = [
        Line2D([0], [0], color='blue', lw=2, label="Nominals"),
        Line2D([0], [0], color='red', lw=2, label="Anomalies")
    ]
    
    plt.legend(handles=legend_elements, loc='upper right')  # Manually adding legend
    
    plt.grid(True)
    plt.show()
    
    
    plt.figure(figsize=(10, 8))
    
    # Scatter plot for nominals (blue)
    plt.scatter(X[Y == 0][:, 0], X[Y == 0][:, 1], color='blue', alpha=0.6, label="Nominals")
    
    # Scatter plot for anomalies (red)
    plt.scatter(X[Y == 1][:, 0], X[Y == 1][:, 1], color='red', alpha=0.6, label="Anomalies")
    
    # Customizing the plot
    plt.title("Scatter Plot of Nominals and Anomalies", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Adding legend
    plt.legend(loc='upper right')  # Automatically adding legend with labels from scatter plots
    
    # Adding grid
    plt.grid(True)
    
    # Showing the plot
    plt.show()
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        print('There is no initial data.')
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    
    #####################################################################################################################
    # Our trials: 
    
    #Choose a supervised method that will be applied:
    supervised_method = 'LogisticRegression'

    #Initialization

    #Dealing with edge cases:
    if n_old == 0:
        X_old = None
        Y_old = None
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted.tolist()  #We directly use Y_muted, as in all the other methods
    
    # Run the initialization function InitActiveAGG:
    X_lab, Y_lab, all_labeled_scores = InitActiveAGG(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models)


    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    AAA_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Learn from labeled data, propose new predicted anomalies, and propose other data to label:
        X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 1,min_n_nom=5,min_n_anom=1,tau_exp=tau)  
        
        # Pretend to be the expert and add the true labels to the proposed data:
        expert_provided_labels = [Y_new[j] for j in indices_to_expert]
        Y_lab = Y_lab + expert_provided_labels
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_AAA[curr_trial, r] = nY1
        
        #Test the current learned model on the external data in order to calculate the AUC:
        #new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:, 1]
        if learned_model != None:
            new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:,1]
            AAA_AUC[r] = roc_auc_score(Y_AUC,new_preds)
            all_AAA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,new_preds)
        else:
            AAA_AUC[r] = 0.5
            all_AAA_AUC[curr_trial,r] = 0.5
            
        
        
        #Update indices:
        curr_L_index = curr_L_index + B

# Calculate column averages for each array
avg_AAA_1 = np.mean(all_AAA_AUC, axis=0)
avg_nY1_AAA_1 = np.mean(all_nY1_AAA, axis=0)


#Save these results:
np.savez("AUC_1_onlyAAA.npz",
         avg_AAA_1 = avg_AAA_1,
         avg_nY1_AAA_1 = avg_nY1_AAA_1, 
         Y_AUC_1_onlyAAA = Y_AUC #The AUC for the last loop
        )


# Save also the score matrices for later (for the plot below)
np.savez("scores_1_onlyAAA.npz",
         new_preds_1 = new_preds
        )

### The other three methods

In [None]:
np.random.seed(123456789)

In [None]:
############################################################################
# Two-dimensional Gaussian nominals with one two-dimensional Gaussian anomaly 
############################################################################

c = 1

all_LODA_AUC = np.zeros((n_trials,n_loops))
all_active_LODA_AUC = np.zeros((n_trials,int(n_loops/2)))
all_GLAD_AUC = np.zeros((n_trials,n_loops))

all_nY1_LODA = np.zeros((n_trials, n_loops))  # Array to store nY1 values
all_nY1_active_LODA = np.zeros((n_trials, int(n_loops/2)))  # Array to store nY1 values
all_nY1_GLAD = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    # Specific arguments:
    a_list = [[c,c]]
    anomaly_cov_list = [ 
        [[0.1,0],[0,0.1]]
    ]
    nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same mixture distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    
    # Your code for plotting data
    plt.figure(figsize=(10, 8))
    
    # Plot density of nominals (blue)
    sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6)
    
    # Plot density of anomalies (red)
    sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6)
    
    
    # Customizing the plot
    plt.title("Density Plot (2D)", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Manually create legend
    legend_elements = [
        Line2D([0], [0], color='blue', lw=2, label="Nominals"),
        Line2D([0], [0], color='red', lw=2, label="Anomalies")
    ]
    
    plt.legend(handles=legend_elements, loc='upper right')  # Manually adding legend
    
    plt.grid(True)
    plt.show()
    
    
    plt.figure(figsize=(10, 8))
    
    # Scatter plot for nominals (blue)
    plt.scatter(X[Y == 0][:, 0], X[Y == 0][:, 1], color='blue', alpha=0.6, label="Nominals")
    
    # Scatter plot for anomalies (red)
    plt.scatter(X[Y == 1][:, 0], X[Y == 1][:, 1], color='red', alpha=0.6, label="Anomalies")
    
    # Customizing the plot
    plt.title("Scatter Plot of Nominals and Anomalies", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Adding legend
    plt.legend(loc='upper right')  # Automatically adding legend with labels from scatter plots
    
    # Adding grid
    plt.grid(True)
    
    # Showing the plot
    plt.show()
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        print('There is no initial data.')
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    
    #####################################################################################################################
    # LODA TRIALS
    
    curr_L_index = n_old
    LODA_AUC = [0]*n_loops
        
    weighted_scores=np.mean(new_unweighted_validation_scores,axis=1)

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(n_loops):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Final LODA scores are averages over anomaly detectors
        new_LODA_scores = np.mean(new_unweighted_scores,axis=1)

        #############################################################
        # Sort these scores:
        top_k = n_send
        sorted_indices = np.argsort(new_LODA_scores)
        sorted_scores = new_LODA_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_LODA[curr_trial,r] = nY1
        #############################################################
        
        #############################################################
        LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_scores)
        all_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)

    #####################################################################################################################
    # ACTIVE-LODA trials:
    
    #We do however have to provide C_tau since it needs to be used before the optimization
    #function. C_tau = 0.03 is the default suggested in Das et al. (2016).
    C_tau = tau
    
    #There are hyperparameters that need to be set in advance for this algorithm. However, 
    #for simplicity we assume they tal the default values in the function optimize_w.
    #C_A = 100  #default in their article
    #C_eta = 1000. #default in their article
    
    #In active LODA, the whole budget of n_send items per loop is dedicated to sending the 
    #items with highest predicted scores:
    top_k = n_send
    
    #So now that we have the LODA projectors (i.e., a set of best_m anomaly detectors),
    #we can begin.
    
    #We shall initialize the vector of weights as being equal and summing to 1:
    w_old = np.array([1/best_m for i in range(best_m)])
    
    #We also initialize arrays to put the unweighted scores of labeled data into:
    H_A = np.empty((0, best_m))
    H_N = np.empty((0, best_m))
    
    #We also initialize a fake anomaly alert to 0 (see below). This means basically that
    #we have not so far had to add a "fake anomaly" to the optimization due to there only
    #being labeled nominals so far.
    fake_anomaly = 0
    
    #Unlike basic LODA, here anomaly and nominal labels MATTER. In particular, at the
    #beginning, it matters whether there is initial "old" data, and if so, whether some
    #or all of it is already labeled. If n_old > 0, then we have already calculated Y_muted
    #earlier in this script (for some fixed percentage u of this "old" data for which we
    #suppose we know its true label)
    
    #If there were initial data and at least one labeled nominal (following Das et al. (2016))
    if n_old > 0 and np.sum(Y_muted == 0) > 0:
            
        #Extract the initial data from X:
        X_new = X[:n_old,:]
    
        #Calculate the unweighted scores for each LODA projector on the initial data: 
        new_unweighted_scores = np.empty((n_old,best_m))
    
        for i, (name, model) in enumerate(models.items()):
    
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
    
        #Calculate the sum of the linear combination of these scores weighted by w_old:
        new_scores = np.matmul(new_unweighted_scores,w_old)
    
        #Sort new_scores from smallest to largest, whilst retaining the indices.
        sorted_indices = np.argsort(new_scores)
        sorted_scores = new_scores[sorted_indices]
    
        #Calculate q_tau on this initial data: 
        #WARNING: one of the underlying problems with active-LODA is that it basically expects
        #anomalies to have the highest scores from the get go. But here, in this first loop,
        #it very well could be that the anomalies have all the LOWEST scores. The calculation
        #of q_tau at this point is therefore a bit weird/pointless. Also, active-LODA basically
        #expects positive weights, especially in its minimization step, with its L2 norm penalty
        #on the weights. This means that active-LODA does not reach its true potential, as
        #defined and coded by Das et al. (2016). The trouble is is that without a penalty on 
        #making the weights not too big, not too small, and not necessarily positive, it will
        #remain suboptimal. 
        my_quantile_sorted_index = int(np.floor(n_old*(1-C_tau)))-1*(C_tau != 1)
        q_tau = sorted_scores[my_quantile_sorted_index]
    
        #Create the arrays H_A and H_N:
        #First we take H_A. In Das et al. (2016) they allow for the case that there
        #are no labeled anomalies, only labeled nominals.
        if np.sum(Y_muted == 1) == 0:
            print('There were no labeled anomalies in the old data')
            #Set a "fake anomaly alert to 1":
            fake_anomaly = 1
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            H_N = new_unweighted_scores[(Y_muted==0),:]
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)

        else:
            #There is at least one labeled anomaly:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            H_N = new_unweighted_scores[(Y_muted==0),:]
     
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
            
            temp_unweighted = np.concatenate([H_A,H_N])
            temp_pred = np.matmul(temp_unweighted,w_new)
            temp_YA = [1]*np.shape(H_A)[0]
            temp_YN = [0]*np.shape(H_N)[0]
            temp_Y = temp_YA + temp_YN
            
        #We can now update w_old with the value of w_new. If this whole big loop wasn't run,
        #then w_old will stay at its original value.
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
    
    #Else if there were initial data but no labeled nominals: Das et al. (2016)
    #provide no details for what to do here. It is possible though unlikely that
    #in the initial data, there were no labeled nominals but there was at least
    #one labeled anomaly. We do have to check this, since we will need to initialize
    #H_A in this case, even if H_N is empty.
    elif n_old > 0 and np.sum(Y_muted == 0) == 0:
        if np.sum(Y_muted == 1) > 0:
            #initialize H_A:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            #Potential Python issue whereby if there is just one anomaly, weird
            #things happen with array shapes. To guard against this:
            if np.sum(Y_muted == 1) == 1:
                if np.shape(H_A) != (1,best_m):
                    H_A = H_A.reshape(1,-1)
                
    #So, at this point, either we had no old data and w_old is still equal weights, 
    #with H_A, H_N empty arrays, or else there was old data, and H_A and H_N may 
    #have been added to (or not), while w_old may or may not have already been 
    #updated.
    
    #We next move to the batch data.
    
    curr_L_index = n_old
    active_LODA_AUC = [0]*n_loops

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(int(n_loops/2)):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        n_new_true_anom = sum(Y_new==1)
        
        #Update for the next loop:
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Temporary final active-LODA scores are a linear combination over anomaly detectors:
        temp_new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        #Calculated the weighted scores on the external validation set with the current
        #value of w_old:
        weighted_validation_scores = np.matmul(new_unweighted_validation_scores,w_old)
        active_LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        all_active_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_validation_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)

        #We actually have to, at this point, attach the current versions of H_A
        #and H_N to new_unweighted_scores, since in this batch framework, we do
        #not have a fixed number of data points from the start to the finish, like
        #they do in Das et al. (2016). If we do not do this, it will affect the
        #calculation of q_tau over time (a kind of bias will be introduced, maybe
        #not the end of the world, but still.)
        
        new_unweighted_scores = np.concatenate([new_unweighted_scores,H_A,H_N])
        
        #Actual final active-LODA scores are a linear combination over anomaly detectors:
        new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)

        temp_Y = Y_new.tolist() + [1]*np.shape(H_A)[0] + [0]*np.shape(H_N)[0]
        
        #Following the methodology in Das et al. (2016), we should provide the 
        #highest scoring data-point to an expert for labeling. In order to be
        #slightly more general, we shall instead provide the top_k scoring data
        #points to the expert, where top_k has been pre-defined.
        
        #Sort new_active_LODA_scores from smallest to largest, whilst retaining the indices.
        #Remember that these may include items from the previous loop or from the initialization.
        #However, since we appended those on to the end, we know their indices will be B, B+1,...
        #so we will be able to look out for them.
        sorted_indices = np.argsort(new_active_LODA_scores)
        sorted_scores = new_active_LODA_scores[sorted_indices]
    
        my_quantile_sorted_index = int(np.floor(len(new_active_LODA_scores)*(1-C_tau)))-1*(C_tau != 1)
        
        #We now have to go through the sorted_indices from the end back towards the 
        #beginning until we manage to gather top_k indices which are less than or equal to B-1.
        top_k_indices = []
        n_indices_so_far = 0
        curr_index = len(sorted_indices)
        while n_indices_so_far < top_k:
            curr_index = curr_index - 1
            next_top_index = sorted_indices[curr_index]
            #print('next top index:',next_top_index)
            if next_top_index < B:
                top_k_indices.append(next_top_index)
                n_indices_so_far = n_indices_so_far + 1
                
        #Now we go and get the labels in Y_new associated with these indices:
        #top_k_indices = np.array(top_k_indices)
        Y_expert = Y_new[top_k_indices]

        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_active_LODA[curr_trial,r] = nY1
        
        #Now is a good time to check whether we just found at least one real anomaly while
        #up to now we only had one fake anomaly:
        if sum(Y_expert==1) > 0 and fake_anomaly==1:
            #We now reset H_A to be empty:
            H_A = np.empty((0, best_m))
            #And we set fake_anomaly to 0 forever:
            fake_anomaly = 0
            
        if sum(Y_expert==1) == 0 and fake_anomaly==1:
            #We reset H_A back to empty again:
            H_A = np.empty((0, best_m))
                
        #We then need to append the relevant unweighted scores to the current H_A and H_N
        for j in range(top_k):
            if Y_expert[j]==1:
                H_A = np.concatenate([H_A,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
            else:
                H_N = np.concatenate([H_N,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
                
        #Here we have to check whether this was the first time H_A actually contained
        #a real anomaly or not, rather than a temporary one (if there were no labeled anomalies)    
        
        #Now since top_k > 0 we know that there is a positive number of labeled data for sure.
        #We always have to retain however the possibility that all of the labeled data so far
        #are nominals. 
        if np.shape(H_A)[0]==0:
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
        else:
            w_new = optimize_w_2(H_A, H_N, q_tau)
    
                
    
        #Update q_tau on this data:
        q_tau = sorted_scores[my_quantile_sorted_index]
        
        #update w_old
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
        temp_unweighted = np.concatenate([H_A,H_N])
        temp_pred = np.matmul(temp_unweighted,w_old)
        temp_YA = [1]*np.shape(H_A)[0]
        temp_YN = [0]*np.shape(H_N)[0]
        temp_Y = temp_YA + temp_YN

    #####################################################################################################################
    # GLAD trials:
    
    #Initialize some parameters:
    b = 0.5
    mylambda = 1
    top_k = n_send
    q_tau_tm1 = -10e7
    
    # Build the model:
    model_GLAD = build_neural_network(np.shape(X)[1], best_m)
    
    # Compile the model with the custom loss function:
    model_GLAD.compile(optimizer='adam', loss=custom_binary_crossentropy_loss(b=b,mylambda=mylambda), metrics=['accuracy'])
    
    # Train the model
    # We use the same n_min used earlier to find the number of LODA projections:
    y_true = np.full((n_min, best_m), b)
    model_GLAD.fit(X[:n_min,:], y_true, epochs=10, batch_size=32,verbose=0)
    
    # Sanity check: the output for each data point should all be very close to b:
    row = X[1:2] 
    output = model_GLAD.predict(row)
    
    
    #INITIALIZATION
    #Dealing with edge cases:
    if n_old == 0:
        X_lab = np.empty([0,np.shape(X)[1]])
        Y_lab = []
        all_labeled_scores = np.empty([0,best_m])
        all_unweighted_scores = np.empty([0,best_m])
    
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted  #We directly use Y_muted, as in all the other methods
        which_lab = [i for i in range(len(Y_old)) if not np.isnan(Y_old[i])]
    
        if len(which_lab) == 0:
            X_lab = np.empty([0,np.shape(X)[1]])
            Y_lab = [] 
            all_unweighted_scores = np.empty([0,best_m])
            all_labeled_scores = np.empty([0,best_m])
        else:
            X_lab = X_old[which_lab,:]
            Y_lab = [Y_old[j] for j in which_lab]
            all_unweighted_scores = np.empty([np.shape(X_old)[0],best_m])
            for i, (name, model) in enumerate(models.items()):
                model.fit(X_old)
                y_score = model.score_samples(X_old)
                y_score.dtype = np.float64
                all_unweighted_scores[:,i] = y_score.squeeze()
    
            all_labeled_scores = all_unweighted_scores[which_lab,:]
    
    #LOOPING OVER BATCHES:
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    GLAD_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Calculate all of the scores for X_new:
        all_scores = np.empty([np.shape(X_new)[0],best_m])
        for i, (name, model) in enumerate(models.items()):
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            all_scores[:,i] = y_score.squeeze()
    
        #Update:
        all_unweighted_scores = np.concatenate([all_unweighted_scores,all_scores])
    
        # Pass X_new through the current state of the neural network in order to get the weights out:
        curr_w = model_GLAD.predict(X_new)

        X_new_final_scores = np.sum(all_scores * curr_w, axis=1)
    
        # Sort these scores:
        sorted_indices = np.argsort(X_new_final_scores)
        sorted_scores = X_new_final_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        # Add the scores of the top_k data points to all_labeled_scores:
        all_labeled_scores = np.concatenate([all_labeled_scores,all_scores[top_k_indices,:]])
    
        # Add the relevant data points to X_lab
        X_lab = np.concatenate([X_lab,X_new[top_k_indices,:]])
    
        # Get the true labels of these data points and add them to Y_lab
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
    
        #Update X_so_far (all X data so far):
        X_so_far = X[:(n_old + (r+1)*B),:]
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_GLAD[curr_trial, r] = nY1
    
        curr_w_ext = model_GLAD.predict(X_AUC)
        X_new_final_scores_ext = np.zeros((np.shape(X_AUC)[0],))
        for k in range(np.shape(X_AUC)[0]):
            X_new_final_scores_ext[k] = np.matmul(new_unweighted_validation_scores[k,:],np.transpose(curr_w_ext[k,:]))
        
        GLAD_AUC[r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
        all_GLAD_AUC[curr_trial,r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
    
        fpr, tpr, _ = roc_curve(Y_AUC,X_new_final_scores_ext)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
        #Check if this is the first time through:
        if q_tau_tm1 == -10e7:
            #Dealing with q_tau_tm1
            all_weights_so_far = model_GLAD.predict(X_so_far)
            all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
            for k in range(np.shape(X_so_far)[0]):
                all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

            # Calculate the quantile index without fully sorting
            quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
            partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
            q_tau_tm1 = partitioned_array[quantile_index]

        # 1. Identify the indices of anomalies (Y_lab == 1)
        anomaly_indices = np.where(np.array(Y_lab) == 1)[0]
        
        # 2. Repeat the corresponding rows in X_lab and all_labeled_scores (4 copies for each anomaly)
        X_lab_temp = np.vstack([X_lab] + [X_lab[anomaly_indices]] * 4)  # Repeat rows of X_lab for anomalies
        all_labeled_scores_temp = np.vstack([all_labeled_scores] + [all_labeled_scores[anomaly_indices]] * 4)  # Repeat rows of all_labeled_scores for anomalies
        X_so_far_temp = np.vstack([X_so_far] + [X_lab[anomaly_indices]] * 4)

        # 3. Extend Y_lab_temp with four 1s for each anomaly (4 copies for each anomaly)
        Y_lab_temp = np.concatenate([Y_lab, [1] * 4 * len(anomaly_indices)])
        
        model_GLAD.compile(optimizer='adam', loss=lambda y_true, y_pred: new_custom_loss_2(X_lab_temp, Y_lab_temp, q_tau_tm1, all_labeled_scores_temp,model_GLAD, X_so_far_temp, mylambda, b), metrics=['accuracy'])

        
        # Train the model for further iterations (e.g., 10 epochs)
        model_GLAD.fit(X_lab_temp, np.array(Y_lab_temp), epochs=10, batch_size=32,verbose=0)

        #Dealing with q_tau_tm1
        all_weights_so_far = model_GLAD.predict(X_so_far)
        all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
        for k in range(np.shape(X_so_far)[0]):
            all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

        # Calculate the quantile index without fully sorting
        quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
        partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
        q_tau_tm1 = partitioned_array[quantile_index]
        
    
        #Update indices:
        curr_L_index = curr_L_index + B
    
# Calculate column averages for each array
avg_LODA_1 = np.mean(all_LODA_AUC, axis=0)
avg_active_LODA_1 = np.mean(all_active_LODA_AUC, axis=0)
avg_GLAD_1 = np.mean(all_GLAD_AUC, axis=0)

avg_nY1_LODA_1 = np.mean(all_nY1_LODA, axis=0)
avg_nY1_active_LODA_1 = np.mean(all_nY1_active_LODA, axis=0)
avg_nY1_GLAD_1 = np.mean(all_nY1_GLAD, axis=0)


#Save these results:
np.savez("AUC_1.npz",
         avg_LODA_1 = avg_LODA_1,
         avg_active_LODA_1 = avg_active_LODA_1,
         avg_GLAD_1 = avg_GLAD_1,
         avg_nY1_LODA_1 = avg_nY1_LODA_1,
         avg_nY1_active_LODA_1 = avg_nY1_active_LODA_1,
         avg_nY1_GLAD_1 = avg_nY1_GLAD_1,     
        )


# Save also the score matrices for later (for the plot below)
np.savez("scores_1.npz",
         weighted_scores_1 = weighted_scores,
         weighted_validation_scores_1 = weighted_validation_scores,
         X_new_final_scores_ext_1 = X_new_final_scores_ext,
        )

### Load the AUC and anomaly count data and plot it

In [None]:
# Ours
data_1_onlyAAA = np.load("AUC_1_onlyAAA.npz")
avg_AAA_1 = data_1_onlyAAA["avg_AAA_1"]
avg_nY1_AAA_1 = data_1_onlyAAA["avg_nY1_AAA_1"]
Y_AUC_1 = data_1_onlyAAA["Y_AUC_1_onlyAAA"] 


# The others
data_1 = np.load("AUC_1.npz")
avg_LODA_1 = data_1["avg_LODA_1"]
avg_active_LODA_1 = data_1["avg_active_LODA_1"]
avg_GLAD_1 = data_1["avg_GLAD_1"]
avg_nY1_LODA_1 = data_1["avg_nY1_LODA_1"]
avg_nY1_active_LODA_1 = data_1["avg_nY1_active_LODA_1"]
avg_nY1_GLAD_1 = data_1["avg_nY1_GLAD_1"]

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the column averages
plt.figure(figsize=(10, 6))

plt.plot(xx,avg_LODA_1, label="LODA", marker='o')
plt.plot(xxactiveLODA,avg_active_LODA_1, label="Active-LODA", marker='s')
plt.plot(xx,avg_GLAD_1, label="GLAD", marker='^')
plt.plot(xx,avg_AAA_1, label="AAA", marker='d')

# Add plot title and labels
plt.xlabel("Batch", fontsize=14)
plt.ylabel("Average AUC", fontsize=14)

plt.xticks(ticks=xx)

# Add legend
plt.legend(loc="best", fontsize=12)

# Add grid for better readability
plt.grid(True)

# Show the plot
plt.show()

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the four lines
plt.plot(xx, avg_nY1_LODA_1, label="LODA", marker='o')
plt.plot(xxactiveLODA, avg_nY1_active_LODA_1, label="Active-LODA", marker='s')
plt.plot(xx, avg_nY1_GLAD_1, label="GLAD", marker='^')
plt.plot(xx, avg_nY1_AAA_1, label="AAA", marker='d')

# Labels and title
plt.xlabel("Batch")
plt.ylabel("Anomalies detected")
plt.title("Cumulative anomalies detected")

# Legend
plt.legend()

# Grid for better readability
plt.grid(True, linestyle='--', alpha=0.6)

# Show the plot
plt.show()

### Plot the densities

In [None]:
# Ours
scores_1_onlyAAA = np.load("scores_1_onlyAAA.npz")
new_preds_1 = scores_1_onlyAAA["new_preds_1"]

# The others
scores_1 = np.load("scores_1.npz")
weighted_scores_1 = scores_1["weighted_scores_1"]
weighted_validation_scores_1 = scores_1["weighted_validation_scores_1"]
X_new_final_scores_ext_1 = scores_1["X_new_final_scores_ext_1"]

In [None]:
score_arrays = [weighted_scores_1, weighted_validation_scores_1, X_new_final_scores_ext_1, new_preds_1]
titles = ["LODA", "Active-LODA", "GLAD", "AAA"]

# Set font sizes
title_fontsize = 16
label_fontsize = 13
legend_fontsize = 14
tick_fontsize = 10

# Create subplots
fig, axes = plt.subplots(2, 2, figsize=(10, 8))  # 2x2 grid
axes = axes.flatten()  # Flatten for easy iteration

for i, scores in enumerate(score_arrays):
    ax = axes[i]

    # Define x-axis range
    x_min, x_max = np.min(scores), np.max(scores)
    x_vals = np.linspace(x_min, x_max, 200)

    # Split into nominal and anomaly
    nominal_scores = scores[Y_AUC_1 == 0]
    anomaly_scores = scores[Y_AUC_1 == 1]

    # KDE with proper scaling
    if len(nominal_scores) > 1:
        kde_nominal = gaussian_kde(nominal_scores)
        nominal_density = kde_nominal(x_vals) * (1 - tau)
        ax.plot(x_vals, nominal_density, label="Nominal (Y=0)", color="blue")

    if len(anomaly_scores) > 1:
        kde_anomaly = gaussian_kde(anomaly_scores)
        anomaly_density = 10 * kde_anomaly(x_vals) * tau
        ax.plot(x_vals, anomaly_density, label="Anomaly (Y=1)", color="red")

    # Formatting
    ax.set_title(f"{titles[i]} (c=1)", fontsize=title_fontsize)
    ax.set_xlabel("Score", fontsize=label_fontsize)
    ax.set_ylabel("Aggregated score 'density'", fontsize=label_fontsize)
    ax.legend(fontsize=legend_fontsize)
    ax.grid(True)
    ax.tick_params(axis='both', labelsize=tick_fontsize)

    if i == 0:
        ax.set_ylim(0, 1.55)
    if i == 1:
        ax.set_ylim(0, 1.1)
    if i == 2:
        ax.set_ylim(0, 61000)
    if i == 3:
        ax.set_ylim(0, 11)

# Adjust layout
plt.tight_layout()
plt.show()

fig.savefig("ScoreDensities_1.pdf", format="pdf", bbox_inches="tight")

## $c=0.5$

### Our method

In [None]:
np.random.seed(123456789)

In [None]:
############################################################################
# Two-dimensional Gaussian nominals with one two-dimensional Gaussian anomaly 
############################################################################

c = 0.5

all_AAA_AUC = np.zeros((n_trials,n_loops))

all_nY1_AAA = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    # Specific arguments:
    a_list = [[c,c]]
    anomaly_cov_list = [ 
        [[0.1,0],[0,0.1]]
    ]
    nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same mixture distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    
    # Your code for plotting data
    plt.figure(figsize=(10, 8))
    
    # Plot density of nominals (blue)
    sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6)
    
    # Plot density of anomalies (red)
    sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6)
    
    
    # Customizing the plot
    plt.title("Density Plot (2D)", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Manually create legend
    legend_elements = [
        Line2D([0], [0], color='blue', lw=2, label="Nominals"),
        Line2D([0], [0], color='red', lw=2, label="Anomalies")
    ]
    
    plt.legend(handles=legend_elements, loc='upper right')  # Manually adding legend
    
    plt.grid(True)
    plt.show()
    
    
    plt.figure(figsize=(10, 8))
    
    # Scatter plot for nominals (blue)
    plt.scatter(X[Y == 0][:, 0], X[Y == 0][:, 1], color='blue', alpha=0.6, label="Nominals")
    
    # Scatter plot for anomalies (red)
    plt.scatter(X[Y == 1][:, 0], X[Y == 1][:, 1], color='red', alpha=0.6, label="Anomalies")
    
    # Customizing the plot
    plt.title("Scatter Plot of Nominals and Anomalies", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Adding legend
    plt.legend(loc='upper right')  # Automatically adding legend with labels from scatter plots
    
    # Adding grid
    plt.grid(True)
    
    # Showing the plot
    plt.show()
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        print('There is no initial data.')
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
        #print(i+1,'out of',len(models))
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    
    
    #####################################################################################################################
    # Our trials: 
    
    #Choose a supervised method that will be applied:
    supervised_method = 'LogisticRegression'

    #Initialization

    #Dealing with edge cases:
    if n_old == 0:
        X_old = None
        Y_old = None
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted.tolist()  #We directly use Y_muted, as in all the other methods
    
    # Run the initialization function InitActiveAGG:
    X_lab, Y_lab, all_labeled_scores = InitActiveAGG(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models)


    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    AAA_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Learn from labeled data, propose new predicted anomalies, and propose other data to label:
        X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 0.4,min_n_nom=5,min_n_anom=1,tau_exp=tau)  
        
        # Pretend to be the expert and add the true labels to the proposed data:
        expert_provided_labels = [Y_new[j] for j in indices_to_expert]
        Y_lab = Y_lab + expert_provided_labels
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_AAA[curr_trial, r] = nY1
        
        #Test the current learned model on the external data in order to calculate the AUC:
        if learned_model != None:
            new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:,1]
            AAA_AUC[r] = roc_auc_score(Y_AUC,new_preds)
            all_AAA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,new_preds)
        else:
            AAA_AUC[r] = 0.5
            all_AAA_AUC[curr_trial,r] = 0.5
            
        
        
        #Update indices:
        curr_L_index = curr_L_index + B
        
# Calculate column averages for each array
avg_AAA_0p5 = np.mean(all_AAA_AUC, axis=0)
avg_nY1_AAA_0p5 = np.mean(all_nY1_AAA, axis=0)

#Save these results:
np.savez("AUC_0p5_onlyAAA.npz",
         avg_AAA_0p5 = avg_AAA_0p5,
         avg_nY1_AAA_0p5 = avg_nY1_AAA_0p5,
         )

# Save also the score matrices for later (for the plot below)
np.savez("scores_0p5_onlyAAA.npz",
         new_preds_0p5 = new_preds,
        )


### The other methods

In [None]:
np.random.seed(123456789)

In [None]:
############################################################################
# Two-dimensional Gaussian nominals with one two-dimensional Gaussian anomaly 
############################################################################

c = 0.5

all_LODA_AUC = np.zeros((n_trials,n_loops))
all_active_LODA_AUC = np.zeros((n_trials,int(n_loops/2)))
all_GLAD_AUC = np.zeros((n_trials,n_loops))

all_nY1_LODA = np.zeros((n_trials, n_loops))  # Array to store nY1 values
all_nY1_active_LODA = np.zeros((n_trials, int(n_loops/2)))  # Array to store nY1 values
all_nY1_GLAD = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    # Specific arguments:
    a_list = [[c,c]]
    anomaly_cov_list = [ 
        [[0.1,0],[0,0.1]]
    ]
    nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same mixture distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    
    # Your code for plotting data
    plt.figure(figsize=(10, 8))
    
    # Plot density of nominals (blue)
    sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6)
    
    # Plot density of anomalies (red)
    sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6)
    
    
    # Customizing the plot
    plt.title("Density Plot (2D)", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Manually create legend
    legend_elements = [
        Line2D([0], [0], color='blue', lw=2, label="Nominals"),
        Line2D([0], [0], color='red', lw=2, label="Anomalies")
    ]
    
    plt.legend(handles=legend_elements, loc='upper right')  # Manually adding legend
    
    plt.grid(True)
    plt.show()
    
    
    plt.figure(figsize=(10, 8))
    
    # Scatter plot for nominals (blue)
    plt.scatter(X[Y == 0][:, 0], X[Y == 0][:, 1], color='blue', alpha=0.6, label="Nominals")
    
    # Scatter plot for anomalies (red)
    plt.scatter(X[Y == 1][:, 0], X[Y == 1][:, 1], color='red', alpha=0.6, label="Anomalies")
    
    # Customizing the plot
    plt.title("Scatter Plot of Nominals and Anomalies", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Adding legend
    plt.legend(loc='upper right')  # Automatically adding legend with labels from scatter plots
    
    # Adding grid
    plt.grid(True)
    
    # Showing the plot
    plt.show()
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        print('There is no initial data.')
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
            #print('The data points corresponding to unmuted anomalies are:',X_with_anomalies)
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    
    #####################################################################################################################
    # LODA TRIALS
    
    curr_L_index = n_old
    LODA_AUC = [0]*n_loops
        
    weighted_scores=np.mean(new_unweighted_validation_scores,axis=1)

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(n_loops):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Final LODA scores are averages over anomaly detectors
        new_LODA_scores = np.mean(new_unweighted_scores,axis=1)


        #############################################################
        # Sort these scores:
        top_k = n_send
        sorted_indices = np.argsort(new_LODA_scores)
        sorted_scores = new_LODA_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_LODA[curr_trial,r] = nY1
        #############################################################
        
        #############################################################
        LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_scores)
        all_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
    #####################################################################################################################
    # ACTIVE-LODA trials:
    
    #We do however have to provide C_tau since it needs to be used before the optimization
    #function. C_tau = 0.03 is the default suggested in Das et al. (2016).
    C_tau = tau
    
    #There are hyperparameters that need to be set in advance for this algorithm. However, 
    #for simplicity we assume they tal the default values in the function optimize_w.
    #C_A = 100  #default in their article
    #C_eta = 1000. #default in their article
    
    #In active LODA, the whole budget of n_send items per loop is dedicated to sending the 
    #items with highest predicted scores:
    top_k = n_send

    #So now that we have the LODA projectors (i.e., a set of best_m anomaly detectors),
    #we can begin.
    
    #We shall initialize the vector of weights as being equal and summing to 1:
    w_old = np.array([1/best_m for i in range(best_m)])
    
    #We also initialize arrays to put the unweighted scores of labeled data into:
    H_A = np.empty((0, best_m))
    H_N = np.empty((0, best_m))
    
    #We also initialize a fake anomaly alert to 0 (see below). This means basically that
    #we have not so far had to add a "fake anomaly" to the optimization due to there only
    #being labeled nominals so far.
    fake_anomaly = 0
    
    #Unlike basic LODA, here anomaly and nominal labels MATTER. In particular, at the
    #beginning, it matters whether there is initial "old" data, and if so, whether some
    #or all of it is already labeled. If n_old > 0, then we have already calculated Y_muted
    #earlier in this script (for some fixed percentage u of this "old" data for which we
    #suppose we know its true label)
    
    #If there were initial data and at least one labeled nominal (following Das et al. (2016))
    if n_old > 0 and np.sum(Y_muted == 0) > 0:
            
        #Extract the initial data from X:
        X_new = X[:n_old,:]
    
        #Calculate the unweighted scores for each LODA projector on the initial data: 
        new_unweighted_scores = np.empty((n_old,best_m))
    
        for i, (name, model) in enumerate(models.items()):
    
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
    
        #Calculate the sum of the linear combination of these scores weighted by w_old:
        new_scores = np.matmul(new_unweighted_scores,w_old)
    
        #Sort new_scores from smallest to largest, whilst retaining the indices.
        sorted_indices = np.argsort(new_scores)
        sorted_scores = new_scores[sorted_indices]
    
        #Calculate q_tau on this initial data: 
        #WARNING: one of the underlying problems with active-LODA is that it basically expects
        #anomalies to have the highest scores from the get go. But here, in this first loop,
        #it very well could be that the anomalies have all the LOWEST scores. The calculation
        #of q_tau at this point is therefore a bit weird/pointless. Also, active-LODA basically
        #expects positive weights, especially in its minimization step, with its L2 norm penalty
        #on the weights. This means that active-LODA does not reach its true potential, as
        #defined and coded by Das et al. (2016). The trouble is is that without a penalty on 
        #making the weights not too big, not too small, and not necessarily positive, it will
        #remain suboptimal. 
        my_quantile_sorted_index = int(np.floor(n_old*(1-C_tau)))-1*(C_tau != 1)
        q_tau = sorted_scores[my_quantile_sorted_index]
    
        #Create the arrays H_A and H_N:
        #First we take H_A. In Das et al. (2016) they allow for the case that there
        #are no labeled anomalies, only labeled nominals.
        if np.sum(Y_muted == 1) == 0:
            #Set a "fake anomaly alert to 1":
            fake_anomaly = 1
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            H_N = new_unweighted_scores[(Y_muted==0),:]
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
                    
        else:
            #There is at least one labeled anomaly:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            H_N = new_unweighted_scores[(Y_muted==0),:]
     
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
            
            temp_unweighted = np.concatenate([H_A,H_N])
            temp_pred = np.matmul(temp_unweighted,w_new)
            temp_YA = [1]*np.shape(H_A)[0]
            temp_YN = [0]*np.shape(H_N)[0]
            temp_Y = temp_YA + temp_YN
            
        #We can now update w_old with the value of w_new. If this whole big loop wasn't run,
        #then w_old will stay at its original value.
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
    
    #Else if there were initial data but no labeled nominals: Das et al. (2016)
    #provide no details for what to do here. It is possible though unlikely that
    #in the initial data, there were no labeled nominals but there was at least
    #one labeled anomaly. We do have to check this, since we will need to initialize
    #H_A in this case, even if H_N is empty.
    elif n_old > 0 and np.sum(Y_muted == 0) == 0:
        if np.sum(Y_muted == 1) > 0:
            #initialize H_A:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            #Potential Python issue whereby if there is just one anomaly, weird
            #things happen with array shapes. To guard against this:
            if np.sum(Y_muted == 1) == 1:
                if np.shape(H_A) != (1,best_m):
                    H_A = H_A.reshape(1,-1)
                
    #So, at this point, either we had no old data and w_old is still equal weights, 
    #with H_A, H_N empty arrays, or else there was old data, and H_A and H_N may 
    #have been added to (or not), while w_old may or may not have already been 
    #updated.
    
    #We next move to the batch data.
    
    curr_L_index = n_old
    active_LODA_AUC = [0]*n_loops

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(int(n_loops/2)):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        n_new_true_anom = sum(Y_new==1)
        
        #Update for the next loop:
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Temporary final active-LODA scores are a linear combination over anomaly detectors:
        temp_new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)

        #Calculated the weighted scores on the external validation set with the current
        #value of w_old:
        weighted_validation_scores = np.matmul(new_unweighted_validation_scores,w_old)
        active_LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        all_active_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_validation_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
        
        new_unweighted_scores = np.concatenate([new_unweighted_scores,H_A,H_N])
        
        #Actual final active-LODA scores are a linear combination over anomaly detectors:
        new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        temp_Y = Y_new.tolist() + [1]*np.shape(H_A)[0] + [0]*np.shape(H_N)[0]

        #Following the methodology in Das et al. (2016), we should provide the 
        #highest scoring data-point to an expert for labeling. In order to be
        #slightly more general, we shall instead provide the top_k scoring data
        #points to the expert, where top_k has been pre-defined.
        
        #Sort new_active_LODA_scores from smallest to largest, whilst retaining the indices.
        #Remember that these may include items from the previous loop or from the initialization.
        #However, since we appended those on to the end, we know their indices will be B, B+1,...
        #so we will be able to look out for them.
        sorted_indices = np.argsort(new_active_LODA_scores)
        sorted_scores = new_active_LODA_scores[sorted_indices]
        #print('sorted indices:',sorted_indices)
        #print('sorted scores:',sorted_scores)
    
        my_quantile_sorted_index = int(np.floor(len(new_active_LODA_scores)*(1-C_tau)))-1*(C_tau != 1)
        #print(my_quantile_sorted_index)
        
        #We now have to go through the sorted_indices from the end back towards the 
        #beginning until we manage to gather top_k indices which are less than or equal to B-1.
        top_k_indices = []
        n_indices_so_far = 0
        curr_index = len(sorted_indices)
        while n_indices_so_far < top_k:
            curr_index = curr_index - 1
            next_top_index = sorted_indices[curr_index]
            #print('next top index:',next_top_index)
            if next_top_index < B:
                top_k_indices.append(next_top_index)
                n_indices_so_far = n_indices_so_far + 1
                
        #Now we go and get the labels in Y_new associated with these indices:
        #top_k_indices = np.array(top_k_indices)
        Y_expert = Y_new[top_k_indices]

        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_active_LODA[curr_trial,r] = nY1
        
        #Now is a good time to check whether we just found at least one real anomaly while
        #up to now we only had one fake anomaly:
        if sum(Y_expert==1) > 0 and fake_anomaly==1:
            #We now reset H_A to be empty:
            H_A = np.empty((0, best_m))
            #And we set fake_anomaly to 0 forever:
            fake_anomaly = 0
            
        if sum(Y_expert==1) == 0 and fake_anomaly==1:
            #We reset H_A back to empty again:
            H_A = np.empty((0, best_m))
                
        #We then need to append the relevant unweighted scores to the current H_A and H_N
        for j in range(top_k):
            if Y_expert[j]==1:
                H_A = np.concatenate([H_A,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
            else:
                H_N = np.concatenate([H_N,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
                
        #Here we have to check whether this was the first time H_A actually contained
        #a real anomaly or not, rather than a temporary one (if there were no labeled anomalies)    
        
        #Now since top_k > 0 we know that there is a positive number of labeled data for sure.
        #We always have to retain however the possibility that all of the labeled data so far
        #are nominals. 
        if np.shape(H_A)[0]==0:
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
        else:
            w_new = optimize_w_2(H_A, H_N, q_tau)
    
                
    
        #Update q_tau on this data:
        q_tau = sorted_scores[my_quantile_sorted_index]
        
        #update w_old
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
        temp_unweighted = np.concatenate([H_A,H_N])
        temp_pred = np.matmul(temp_unweighted,w_old)
        temp_YA = [1]*np.shape(H_A)[0]
        temp_YN = [0]*np.shape(H_N)[0]
        temp_Y = temp_YA + temp_YN

    #####################################################################################################################
    # GLAD trials:
    
    #Initialize some parameters:
    b = 0.5
    mylambda = 1
    top_k = n_send
    q_tau_tm1 = -10e7
    
    # Build the model:
    model_GLAD = build_neural_network(np.shape(X)[1], best_m)
    
    # Compile the model with the custom loss function:
    model_GLAD.compile(optimizer='adam', loss=custom_binary_crossentropy_loss(b=b,mylambda=mylambda), metrics=['accuracy'])
    
    # Train the model
    # We use the same n_min used earlier to find the number of LODA projections:
    y_true = np.full((n_min, best_m), b)
    model_GLAD.fit(X[:n_min,:], y_true, epochs=10, batch_size=32,verbose=0)
    
    # Sanity check: the output for each data point should all be very close to b:
    row = X[1:2] 
    output = model_GLAD.predict(row)
    
    #INITIALIZATION
    #Dealing with edge cases:
    if n_old == 0:
        X_lab = np.empty([0,np.shape(X)[1]])
        Y_lab = []
        all_labeled_scores = np.empty([0,best_m])
        all_unweighted_scores = np.empty([0,best_m])
    
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted  #We directly use Y_muted, as in all the other methods
        which_lab = [i for i in range(len(Y_old)) if not np.isnan(Y_old[i])]
    
        if len(which_lab) == 0:
            X_lab = np.empty([0,np.shape(X)[1]])
            Y_lab = [] 
            all_unweighted_scores = np.empty([0,best_m])
            all_labeled_scores = np.empty([0,best_m])
        else:
            X_lab = X_old[which_lab,:]
            Y_lab = [Y_old[j] for j in which_lab]
            all_unweighted_scores = np.empty([np.shape(X_old)[0],best_m])
            for i, (name, model) in enumerate(models.items()):
                model.fit(X_old)
                y_score = model.score_samples(X_old)
                y_score.dtype = np.float64
                all_unweighted_scores[:,i] = y_score.squeeze()
    
            all_labeled_scores = all_unweighted_scores[which_lab,:]
    
    #LOOPING OVER BATCHES:
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    GLAD_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Calculate all of the scores for X_new:
        all_scores = np.empty([np.shape(X_new)[0],best_m])
        for i, (name, model) in enumerate(models.items()):
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            all_scores[:,i] = y_score.squeeze()
    
        #Update:
        all_unweighted_scores = np.concatenate([all_unweighted_scores,all_scores])
    
        # Pass X_new through the current state of the neural network in order to get the weights out:
        curr_w = model_GLAD.predict(X_new)

        X_new_final_scores = np.sum(all_scores * curr_w, axis=1)
    
        # Sort these scores:
        sorted_indices = np.argsort(X_new_final_scores)
        sorted_scores = X_new_final_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        # Add the scores of the top_k data points to all_labeled_scores:
        all_labeled_scores = np.concatenate([all_labeled_scores,all_scores[top_k_indices,:]])
    
        # Add the relevant data points to X_lab
        X_lab = np.concatenate([X_lab,X_new[top_k_indices,:]])
    
        # Get the true labels of these data points and add them to Y_lab
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
    
        #Update X_so_far (all X data so far):
        X_so_far = X[:(n_old + (r+1)*B),:]
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_GLAD[curr_trial, r] = nY1
    
        curr_w_ext = model_GLAD.predict(X_AUC)
        X_new_final_scores_ext = np.zeros((np.shape(X_AUC)[0],))
        for k in range(np.shape(X_AUC)[0]):
            X_new_final_scores_ext[k] = np.matmul(new_unweighted_validation_scores[k,:],np.transpose(curr_w_ext[k,:]))
        
        GLAD_AUC[r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
        all_GLAD_AUC[curr_trial,r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
    
        fpr, tpr, _ = roc_curve(Y_AUC,X_new_final_scores_ext)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
        #Check if this is the first time through:
        if q_tau_tm1 == -10e7:
            #Dealing with q_tau_tm1
            all_weights_so_far = model_GLAD.predict(X_so_far)
            all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
            for k in range(np.shape(X_so_far)[0]):
                all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

            # Calculate the quantile index without fully sorting
            quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
            partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
            q_tau_tm1 = partitioned_array[quantile_index]

        # 1. Identify the indices of anomalies (Y_lab == 1)
        anomaly_indices = np.where(np.array(Y_lab) == 1)[0]
        
        # 2. Repeat the corresponding rows in X_lab and all_labeled_scores (4 copies for each anomaly)
        X_lab_temp = np.vstack([X_lab] + [X_lab[anomaly_indices]] * 4)  # Repeat rows of X_lab for anomalies
        all_labeled_scores_temp = np.vstack([all_labeled_scores] + [all_labeled_scores[anomaly_indices]] * 4)  # Repeat rows of all_labeled_scores for anomalies
        X_so_far_temp = np.vstack([X_so_far] + [X_lab[anomaly_indices]] * 4)

        # 3. Extend Y_lab_temp with four 1s for each anomaly (4 copies for each anomaly)
        Y_lab_temp = np.concatenate([Y_lab, [1] * 4 * len(anomaly_indices)])

        model_GLAD.compile(optimizer='adam', loss=lambda y_true, y_pred: new_custom_loss_2(X_lab_temp, Y_lab_temp, q_tau_tm1, all_labeled_scores_temp,model_GLAD, X_so_far_temp, mylambda, b), metrics=['accuracy'])

        
        # Train the model for further iterations (e.g., 10 epochs)
        model_GLAD.fit(X_lab_temp, np.array(Y_lab_temp), epochs=10, batch_size=32,verbose=0)
    
        #Dealing with q_tau_tm1
        all_weights_so_far = model_GLAD.predict(X_so_far)
        all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
        for k in range(np.shape(X_so_far)[0]):
            all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

        # Calculate the quantile index without fully sorting
        quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
        partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
        q_tau_tm1 = partitioned_array[quantile_index]
        
    
        #Update indices:
        curr_L_index = curr_L_index + B
    
# Calculate column averages for each array
avg_LODA_0p5 = np.mean(all_LODA_AUC, axis=0)
avg_active_LODA_0p5 = np.mean(all_active_LODA_AUC, axis=0)
avg_GLAD_0p5 = np.mean(all_GLAD_AUC, axis=0)

avg_nY1_LODA_0p5 = np.mean(all_nY1_LODA, axis=0)
avg_nY1_active_LODA_0p5 = np.mean(all_nY1_active_LODA, axis=0)
avg_nY1_GLAD_0p5 = np.mean(all_nY1_GLAD, axis=0)

#Save these results:
np.savez("AUC_0p5.npz",
         avg_LODA_0p5 = avg_LODA_0p5,
         avg_active_LODA_0p5 = avg_active_LODA_0p5,
         avg_GLAD_0p5 = avg_GLAD_0p5,
         avg_nY1_LODA_0p5 = avg_nY1_LODA_0p5,
         avg_nY1_active_LODA_0p5 = avg_nY1_active_LODA_0p5,
         avg_nY1_GLAD_0p5 = avg_nY1_GLAD_0p5,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_0p5.npz",
         weighted_scores_0p5 = weighted_scores,
         weighted_validation_scores_0p5 = weighted_validation_scores,
         X_new_final_scores_ext_0p5 = X_new_final_scores_ext,
        )

### Load the AUC and anomaly count data and plot it

In [None]:
# Ours
data_0p5_onlyAAA = np.load("AUC_0p5_onlyAAA.npz")
avg_AAA_0p5 = data_0p5_onlyAAA["avg_AAA_0p5"]
avg_nY1_AAA_0p5 = data_0p5_onlyAAA["avg_nY1_AAA_0p5"]


# The others
data_0p5 = np.load("AUC_0p5.npz")
avg_LODA_0p5 = data_0p5["avg_LODA_0p5"]
avg_active_LODA_0p5 = data_0p5["avg_active_LODA_0p5"]
avg_GLAD_0p5 = data_0p5["avg_GLAD_0p5"]
avg_nY1_LODA_0p5 = data_0p5["avg_nY1_LODA_0p5"]
avg_nY1_active_LODA_0p5 = data_0p5["avg_nY1_active_LODA_0p5"]
avg_nY1_GLAD_0p5 = data_0p5["avg_nY1_GLAD_0p5"]

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the column averages
plt.figure(figsize=(10, 6))

plt.plot(xx,avg_LODA_0p5, label="LODA", marker='o')
plt.plot(xxactiveLODA,avg_active_LODA_0p5, label="Active-LODA", marker='s')
plt.plot(xx,avg_GLAD_0p5, label="GLAD", marker='^')
plt.plot(xx,avg_AAA_0p5, label="AAA", marker='d')

# Add plot title and labels
plt.xlabel("Batch", fontsize=14)
plt.ylabel("Average AUC", fontsize=14)

plt.xticks(ticks=xx)

# Add legend
plt.legend(loc="best", fontsize=12)

# Add grid for better readability
plt.grid(True)

# Show the plot
plt.show()

### Plot cumulative number of anomalies detected

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the four lines
plt.plot(xx, avg_nY1_LODA_0p5, label="LODA", marker='o')
plt.plot(xxactiveLODA, avg_nY1_active_LODA_0p5, label="Active-LODA", marker='s')
plt.plot(xx, avg_nY1_GLAD_0p5, label="GLAD", marker='^')
plt.plot(xx, avg_nY1_AAA_0p5, label="AAA", marker='d')

# Labels and title
plt.xlabel("Batch")
plt.ylabel("Anomalies detected")
plt.title("Cumulative anomalies detected")

# Legend
plt.legend()

# Grid for better readability
plt.grid(True, linestyle='--', alpha=0.6)

# Show the plot
plt.show()

## Bring back saved variables and plot them

In [None]:
### 2
# Ours
data_2_onlyAAA = np.load("AUC_2_onlyAAA.npz")
avg_AAA_2 = data_2_onlyAAA["avg_AAA_2"]
avg_nY1_AAA_2 = data_2_onlyAAA["avg_nY1_AAA_2"]  
# The others
data_2 = np.load("AUC_2.npz")
avg_LODA_2 = data_2["avg_LODA_2"]
avg_active_LODA_2 = data_2["avg_active_LODA_2"]
avg_GLAD_2 = data_2["avg_GLAD_2"]
avg_nY1_LODA_2 = data_2["avg_nY1_LODA_2"]
avg_nY1_active_LODA_2 = data_2["avg_nY1_active_LODA_2"]
avg_nY1_GLAD_2 = data_2["avg_nY1_GLAD_2"]

### 1p5
# Ours
data_1p5_onlyAAA = np.load("AUC_1p5_onlyAAA.npz")
avg_AAA_1p5 = data_1p5_onlyAAA["avg_AAA_1p5"]
avg_nY1_AAA_1p5 = data_1p5_onlyAAA["avg_nY1_AAA_1p5"]
# The others
data_1p5 = np.load("AUC_1p5.npz")
avg_LODA_1p5 = data_1p5["avg_LODA_1p5"]
avg_active_LODA_1p5 = data_1p5["avg_active_LODA_1p5"]
avg_GLAD_1p5 = data_1p5["avg_GLAD_1p5"]
avg_nY1_LODA_1p5 = data_1p5["avg_nY1_LODA_1p5"]
avg_nY1_active_LODA_1p5 = data_1p5["avg_nY1_active_LODA_1p5"]
avg_nY1_GLAD_1p5 = data_1p5["avg_nY1_GLAD_1p5"]

### 1
# Ours
data_1_onlyAAA = np.load("AUC_1_onlyAAA.npz")
avg_AAA_1 = data_1_onlyAAA["avg_AAA_1"]
avg_nY1_AAA_1 = data_1_onlyAAA["avg_nY1_AAA_1"]
# The others
data_1 = np.load("AUC_1.npz")
avg_LODA_1 = data_1["avg_LODA_1"]
avg_active_LODA_1 = data_1["avg_active_LODA_1"]
avg_GLAD_1 = data_1["avg_GLAD_1"]
avg_nY1_LODA_1 = data_1["avg_nY1_LODA_1"]
avg_nY1_active_LODA_1 = data_1["avg_nY1_active_LODA_1"]
avg_nY1_GLAD_1 = data_1["avg_nY1_GLAD_1"]

### Op5
# Ours
data_0p5_onlyAAA = np.load("AUC_0p5_onlyAAA.npz")
avg_AAA_0p5 = data_0p5_onlyAAA["avg_AAA_0p5"]
avg_nY1_AAA_0p5 = data_0p5_onlyAAA["avg_nY1_AAA_0p5"]
# The others
data_0p5 = np.load("AUC_0p5.npz")
avg_LODA_0p5 = data_0p5["avg_LODA_0p5"]
avg_active_LODA_0p5 = data_0p5["avg_active_LODA_0p5"]
avg_GLAD_0p5 = data_0p5["avg_GLAD_0p5"]
avg_nY1_LODA_0p5 = data_0p5["avg_nY1_LODA_0p5"]
avg_nY1_active_LODA_0p5 = data_0p5["avg_nY1_active_LODA_0p5"]
avg_nY1_GLAD_0p5 = data_0p5["avg_nY1_GLAD_0p5"]

In [None]:
# Values of c
c_values = [0.5, 1, 1.5, 2]

# Create the subplot grid (4 rows, 3 columns)
fig, axes = plt.subplots(4, 3, figsize=(12, 16))  # wider figure for 3 columns
axes = axes.reshape(4, 3)  # for easier row-wise access

# Iterate over the c values and plot each row
for i, c in enumerate(c_values):
    # === COLUMN 1: Density Plots ===
    a_list = [[c, c]]
    anomaly_cov_list = [[[0.1, 0], [0, 0.1]]]
    nominal_mean = np.array([0, 0])
    nominal_cov = np.array([[1, 0], [0, 1]])
    L = len(nominal_mean)

    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians,
        n_old=n_old,
        B=B,
        n_loops=n_loops,
        tau=tau,
        a_list=a_list,
        anomaly_cov_list=anomaly_cov_list,
        nominal_mean=nominal_mean,
        nominal_cov=nominal_cov,
        L=L
    )

    ax1 = axes[i, 0]
    sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6, ax=ax1)
    sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6, ax=ax1)
    ax1.set_title(f"Density plot (c = {c})", fontsize=17)
    ax1.set_xlabel("X-axis", fontsize=15)
    ax1.set_ylabel("Y-axis", fontsize=15)
    ax1.grid(True)
    ax1.legend(handles=[
        Line2D([0], [0], color='blue', lw=2, label="Nominals"),
        Line2D([0], [0], color='red', lw=2, label="Anomalies")
    ], loc='upper right', fontsize=13)

    # === COLUMN 2: AUC Plots ===

    xx = list(range(1, n_loops + 1))
    xxactiveLODA = list(range(1,int(n_loops/2)+1))
    
    ax2 = axes[i, 1]
    avg_LODA_plot, avg_active_LODA_plot, avg_GLAD_plot, avg_AAA_plot = [
        (avg_LODA_0p5, avg_active_LODA_0p5, avg_GLAD_0p5, avg_AAA_0p5),
        (avg_LODA_1, avg_active_LODA_1, avg_GLAD_1, avg_AAA_1),
        (avg_LODA_1p5, avg_active_LODA_1p5, avg_GLAD_1p5, avg_AAA_1p5),
        (avg_LODA_2, avg_active_LODA_2, avg_GLAD_2, avg_AAA_2)
    ][i]

    ax2.plot(xx, avg_LODA_plot, label="LODA", linewidth=1.5)
    ax2.plot(xxactiveLODA, avg_active_LODA_plot, label="Active-LODA", linewidth=1.5)
    ax2.plot(xx, avg_GLAD_plot, label="GLAD", linewidth=1.5)
    ax2.plot(xx, avg_AAA_plot, label="AAA", linewidth=1.5)
    ax2.set_xlabel("Batch", fontsize=15)
    ax2.set_ylabel("Average AUC", fontsize=15)
    #ax2.set_xticks(range(5, n_loops + 1, 5))
    ax2.grid(True)
    ax2.legend(loc="best", fontsize=13)
    ax2.set_title("AUC over time", fontsize=17)

    # === COLUMN 3: Cumulative Anomalies Detected ===
    ax3 = axes[i, 2]
    avg_nY1_LODA_plot, avg_nY1_active_LODA_plot, avg_nY1_GLAD_plot, avg_nY1_AAA_plot = [
        (avg_nY1_LODA_0p5, avg_nY1_active_LODA_0p5, avg_nY1_GLAD_0p5, avg_nY1_AAA_0p5),
        (avg_nY1_LODA_1, avg_nY1_active_LODA_1, avg_nY1_GLAD_1, avg_nY1_AAA_1),
        (avg_nY1_LODA_1p5, avg_nY1_active_LODA_1p5, avg_nY1_GLAD_1p5, avg_nY1_AAA_1p5),
        (avg_nY1_LODA_2, avg_nY1_active_LODA_2, avg_nY1_GLAD_2, avg_nY1_AAA_2)
    ][i]

    ax3.plot(xx, avg_nY1_LODA_plot, label="LODA", linewidth=2, linestyle='-')
    ax3.plot(xxactiveLODA, avg_nY1_active_LODA_plot, label="Active-LODA", linewidth=2, linestyle=':')
    ax3.plot(xx, avg_nY1_GLAD_plot, label="GLAD", linewidth=2, linestyle='-.')
    ax3.plot(xx, avg_nY1_AAA_plot, label="AAA", linewidth=2, linestyle='-')
    ax3.set_xlabel("Batch",fontsize=15)
    ax3.set_ylabel("Anomalies detected",fontsize=15)
    ax3.set_title("Cumul. anomalies detected",fontsize=17)
    ax3.grid(True, linestyle='--', alpha=0.6)
    ax3.legend(fontsize=13)

plt.tight_layout(h_pad=3.0)

# Save the plot:
fig.savefig("2d_Gaussian_Nominals_and_Anomalies2.pdf", format="pdf", bbox_inches="tight")


# Ten-dimensional Gaussian nominals with one ten-dimensional Gaussian anomaly.

In our simulations we have the nominal data as $N((0,\ldots,0),I_{10})$ and the anomaly data as $N((c,\ldots,c),0.1*I_{10})$ for three choices of $c$.

Note that the random seed for this set of trials is the same across these three trials, but different from the random seed used for all of the previous trials. This is because the previous random seed above leads to active-LODA's optimization scheme to crash on one of the trials. More broadly, active-LODA struggles, in general, to converge once there are more than one hundred or so anomalies, which is the case in all of the trials here (due to the crazy number of pairwise constraints it is trying to satisfy, e.g., if there are 20 labeled anomalies and 100 labeled nominals, there are 20x100=2000 pairwise constraints!).

## 10-dimensional $c=0.5$

### Our method

In [None]:
np.random.seed(12345678)

In [None]:
############################################################################
# Ten-dimensional Gaussian nominals with one ten-dimensional Gaussian anomaly 
############################################################################

c = 0.5

#Set n:
num_dim = 10

all_AAA_AUC = np.zeros((n_trials,n_loops))
all_nY1_AAA = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 
  
    
    def scaled_identity_matrix(num_dim,c_m):
        return [[c_m if i == j else 0 for j in range(num_dim)] for i in range(num_dim)]
    
    # Specific arguments:
    a_list = [[c]*num_dim]
    
    anomaly_cov_list = [scaled_identity_matrix(num_dim,.1)]
    
    nominal_mean = np.array([0]*num_dim)    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array(scaled_identity_matrix(num_dim,1))   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same mixture distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Masking the old data, if it exists:
    if n_old == 0:
        #print('There is no initial data.')
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    

    
    
    #####################################################################################################################
    # Our trials: 
    
    #Choose a supervised method that will be applied:
    supervised_method = 'LogisticRegression'

    #Initialization

    #Dealing with edge cases:
    if n_old == 0:
        X_old = None
        Y_old = None
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted.tolist()  #We directly use Y_muted, as in all the other methods
    
    # Run the initialization function InitActiveAGG:
    X_lab, Y_lab, all_labeled_scores = InitActiveAGG(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models)


    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    AAA_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Learn from labeled data, propose new predicted anomalies, and propose other data to label:
        X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 0.4,min_n_nom=5,min_n_anom=1,tau_exp=tau)  
        
        # Pretend to be the expert and add the true labels to the proposed data:
        expert_provided_labels = [Y_new[j] for j in indices_to_expert]
        Y_lab = Y_lab + expert_provided_labels
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_AAA[curr_trial, r] = nY1
        
        #Test the current learned model on the external data in order to calculate the AUC:
        #new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:, 1]
        if learned_model != None:
            new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:,1]
            AAA_AUC[r] = roc_auc_score(Y_AUC,new_preds)
            all_AAA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,new_preds)
        else:
            AAA_AUC[r] = 0.5
            all_AAA_AUC[curr_trial,r] = 0.5
            
        
        
        #Update indices:
        curr_L_index = curr_L_index + B
    
# Calculate column averages for each array
avg_AAA_0p5_10d = np.mean(all_AAA_AUC, axis=0)
avg_nY1_AAA_0p5_10d = np.mean(all_nY1_AAA, axis=0)

#Save these results:
np.savez("AUC_0p5_10d_onlyAAA.npz",
         avg_AAA_0p5_10d = avg_AAA_0p5_10d,
         avg_nY1_AAA_0p5_10d = avg_nY1_AAA_0p5_10d,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_0p5_10d_onlyAAA.npz",
         new_preds_0p5_10d = new_preds
        )

### The other methods

In [None]:
np.random.seed(12345678)

In [None]:
############################################################################
# Ten-dimensional Gaussian nominals with one ten-dimensional Gaussian anomaly 
############################################################################

c = 0.5

#Set n:
num_dim = 10

all_LODA_AUC = np.zeros((n_trials,n_loops))
all_active_LODA_AUC = np.zeros((n_trials,int(n_loops/2)))
all_GLAD_AUC = np.zeros((n_trials,n_loops))

all_nY1_LODA = np.zeros((n_trials, n_loops))  # Array to store nY1 values
all_nY1_active_LODA = np.zeros((n_trials, int(n_loops/2)))  # Array to store nY1 values
all_nY1_GLAD = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 
    
    def scaled_identity_matrix(num_dim,c_m):
        return [[c_m if i == j else 0 for j in range(num_dim)] for i in range(num_dim)]
    
    # Specific arguments:
    a_list = [[c]*num_dim]
    
    anomaly_cov_list = [scaled_identity_matrix(num_dim,.1)]
    
    nominal_mean = np.array([0]*num_dim)    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array(scaled_identity_matrix(num_dim,1))   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same mixture distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Masking the old data, if it exists:
    if n_old == 0:
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    
    #####################################################################################################################
    # LODA TRIALS
    
    curr_L_index = n_old
    LODA_AUC = [0]*n_loops
        
    weighted_scores=np.mean(new_unweighted_validation_scores,axis=1)

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(n_loops):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Final LODA scores are averages over anomaly detectors
        new_LODA_scores = np.mean(new_unweighted_scores,axis=1)

        #############################################################
        # Sort these scores:
        top_k = n_send
        sorted_indices = np.argsort(new_LODA_scores)
        sorted_scores = new_LODA_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_LODA[curr_trial,r] = nY1
        #############################################################        
        
        #############################################################
        LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_scores)
        all_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
    #####################################################################################################################
    # ACTIVE-LODA trials:
    
    #We do however have to provide C_tau since it needs to be used before the optimization
    #function. C_tau = 0.03 is the default suggested in Das et al. (2016).
    C_tau = tau
    
    #There are hyperparameters that need to be set in advance for this algorithm. However, 
    #for simplicity we assume they tal the default values in the function optimize_w.
    #C_A = 100  #default in their article
    #C_eta = 1000. #default in their article
    
    #In active LODA, the whole budget of n_send items per loop is dedicated to sending the 
    #items with highest predicted scores:
    top_k = n_send
    
    #Use the first n_min data-points in X to get the number and set of LODA projectors:
    #models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #So now that we have the LODA projectors (i.e., a set of best_m anomaly detectors),
    #we can begin.
    
    #We shall initialize the vector of weights as being equal and summing to 1:
    w_old = np.array([1/best_m for i in range(best_m)])
    
    #We also initialize arrays to put the unweighted scores of labeled data into:
    H_A = np.empty((0, best_m))
    H_N = np.empty((0, best_m))
    
    #We also initialize a fake anomaly alert to 0 (see below). This means basically that
    #we have not so far had to add a "fake anomaly" to the optimization due to there only
    #being labeled nominals so far.
    fake_anomaly = 0
    
    #Unlike basic LODA, here anomaly and nominal labels MATTER. In particular, at the
    #beginning, it matters whether there is initial "old" data, and if so, whether some
    #or all of it is already labeled. If n_old > 0, then we have already calculated Y_muted
    #earlier in this script (for some fixed percentage u of this "old" data for which we
    #suppose we know its true label)
    
    #If there were initial data and at least one labeled nominal (following Das et al. (2016))
    if n_old > 0 and np.sum(Y_muted == 0) > 0:
            
        #Extract the initial data from X:
        X_new = X[:n_old,:]
    
        #Calculate the unweighted scores for each LODA projector on the initial data: 
        new_unweighted_scores = np.empty((n_old,best_m))
    
        for i, (name, model) in enumerate(models.items()):
    
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()   
    
        #Calculate the sum of the linear combination of these scores weighted by w_old:
        new_scores = np.matmul(new_unweighted_scores,w_old)
    
        #Sort new_scores from smallest to largest, whilst retaining the indices.
        sorted_indices = np.argsort(new_scores)
        sorted_scores = new_scores[sorted_indices]
    
        #Calculate q_tau on this initial data: 
        #WARNING: one of the underlying problems with active-LODA is that it basically expects
        #anomalies to have the highest scores from the get go. But here, in this first loop,
        #it very well could be that the anomalies have all the LOWEST scores. The calculation
        #of q_tau at this point is therefore a bit weird/pointless. Also, active-LODA basically
        #expects positive weights, especially in its minimization step, with its L2 norm penalty
        #on the weights. This means that active-LODA does not reach its true potential, as
        #defined and coded by Das et al. (2016). The trouble is is that without a penalty on 
        #making the weights not too big, not too small, and not necessarily positive, it will
        #remain suboptimal. 
        my_quantile_sorted_index = int(np.floor(n_old*(1-C_tau)))-1*(C_tau != 1)
        q_tau = sorted_scores[my_quantile_sorted_index]
    
        #Create the arrays H_A and H_N:
        #First we take H_A. In Das et al. (2016) they allow for the case that there
        #are no labeled anomalies, only labeled nominals.
        if np.sum(Y_muted == 1) == 0:
            #Set a "fake anomaly alert to 1":
            fake_anomaly = 1
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            H_N = new_unweighted_scores[(Y_muted==0),:]
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
                    
        else:
            #There is at least one labeled anomaly:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            H_N = new_unweighted_scores[(Y_muted==0),:]
     
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
            
            temp_unweighted = np.concatenate([H_A,H_N])
            temp_pred = np.matmul(temp_unweighted,w_new)
            temp_YA = [1]*np.shape(H_A)[0]
            temp_YN = [0]*np.shape(H_N)[0]
            temp_Y = temp_YA + temp_YN
            
        #We can now update w_old with the value of w_new. If this whole big loop wasn't run,
        #then w_old will stay at its original value.
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
    
    #Else if there were initial data but no labeled nominals: Das et al. (2016)
    #provide no details for what to do here. It is possible though unlikely that
    #in the initial data, there were no labeled nominals but there was at least
    #one labeled anomaly. We do have to check this, since we will need to initialize
    #H_A in this case, even if H_N is empty.
    elif n_old > 0 and np.sum(Y_muted == 0) == 0:
        if np.sum(Y_muted == 1) > 0:
            #initialize H_A:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            #Potential Python issue whereby if there is just one anomaly, weird
            #things happen with array shapes. To guard against this:
            if np.sum(Y_muted == 1) == 1:
                if np.shape(H_A) != (1,best_m):
                    H_A = H_A.reshape(1,-1)
                
    #So, at this point, either we had no old data and w_old is still equal weights, 
    #with H_A, H_N empty arrays, or else there was old data, and H_A and H_N may 
    #have been added to (or not), while w_old may or may not have already been 
    #updated.
    
    #We next move to the batch data.
    
    curr_L_index = n_old
    active_LODA_AUC = [0]*n_loops

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(int(n_loops/2)):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        n_new_true_anom = sum(Y_new==1)
        
        #Update for the next loop:
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Temporary final active-LODA scores are a linear combination over anomaly detectors:
        temp_new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)

        #Calculated the weighted scores on the external validation set with the current
        #value of w_old:
        weighted_validation_scores = np.matmul(new_unweighted_validation_scores,w_old)
        active_LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        all_active_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_validation_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)

        #We actually have to, at this point, attach the current versions of H_A
        #and H_N to new_unweighted_scores, since in this batch framework, we do
        #not have a fixed number of data points from the start to the finish, like
        #they do in Das et al. (2016). If we do not do this, it will affect the
        #calculation of q_tau over time (a kind of bias will be introduced, maybe
        #not the end of the world, but still.)
        
        new_unweighted_scores = np.concatenate([new_unweighted_scores,H_A,H_N])
        
        #Actual final active-LODA scores are a linear combination over anomaly detectors:
        new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        temp_Y = Y_new.tolist() + [1]*np.shape(H_A)[0] + [0]*np.shape(H_N)[0]

        #Following the methodology in Das et al. (2016), we should provide the 
        #highest scoring data-point to an expert for labeling. In order to be
        #slightly more general, we shall instead provide the top_k scoring data
        #points to the expert, where top_k has been pre-defined.
        
        #Sort new_active_LODA_scores from smallest to largest, whilst retaining the indices.
        #Remember that these may include items from the previous loop or from the initialization.
        #However, since we appended those on to the end, we know their indices will be B, B+1,...
        #so we will be able to look out for them.
        sorted_indices = np.argsort(new_active_LODA_scores)
        sorted_scores = new_active_LODA_scores[sorted_indices]

        my_quantile_sorted_index = int(np.floor(len(new_active_LODA_scores)*(1-C_tau)))-1*(C_tau != 1)
        
        #We now have to go through the sorted_indices from the end back towards the 
        #beginning until we manage to gather top_k indices which are less than or equal to B-1.
        top_k_indices = []
        n_indices_so_far = 0
        curr_index = len(sorted_indices)
        while n_indices_so_far < top_k:
            curr_index = curr_index - 1
            next_top_index = sorted_indices[curr_index]
            if next_top_index < B:
                top_k_indices.append(next_top_index)
                n_indices_so_far = n_indices_so_far + 1

        #Now we go and get the labels in Y_new associated with these indices:
        #top_k_indices = np.array(top_k_indices)
        Y_expert = Y_new[top_k_indices]

        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_active_LODA[curr_trial,r] = nY1
        
        #Now is a good time to check whether we just found at least one real anomaly while
        #up to now we only had one fake anomaly:
        if sum(Y_expert==1) > 0 and fake_anomaly==1:
            #We now reset H_A to be empty:
            H_A = np.empty((0, best_m))
            #And we set fake_anomaly to 0 forever:
            fake_anomaly = 0
            
        if sum(Y_expert==1) == 0 and fake_anomaly==1:
            #We reset H_A back to empty again:
            H_A = np.empty((0, best_m))
                
        #We then need to append the relevant unweighted scores to the current H_A and H_N
        for j in range(top_k):
            if Y_expert[j]==1:
                H_A = np.concatenate([H_A,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
            else:
                H_N = np.concatenate([H_N,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
                
        #Here we have to check whether this was the first time H_A actually contained
        #a real anomaly or not, rather than a temporary one (if there were no labeled anomalies)    
        
        #Now since top_k > 0 we know that there is a positive number of labeled data for sure.
        #We always have to retain however the possibility that all of the labeled data so far
        #are nominals. 
        if np.shape(H_A)[0]==0:
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
        else:
            w_new = optimize_w_2(H_A, H_N, q_tau)
      
        #Update q_tau on this data:
        q_tau = sorted_scores[my_quantile_sorted_index]
        
        #update w_old
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
        temp_unweighted = np.concatenate([H_A,H_N])
        temp_pred = np.matmul(temp_unweighted,w_old)
        temp_YA = [1]*np.shape(H_A)[0]
        temp_YN = [0]*np.shape(H_N)[0]
        temp_Y = temp_YA + temp_YN

    #####################################################################################################################
    # GLAD trials:
    
    #Initialize some parameters:
    b = 0.5
    mylambda = 1
    top_k = n_send
    q_tau_tm1 = -10e7
    
    # Build the model:
    model_GLAD = build_neural_network(np.shape(X)[1], best_m)
    
    # Compile the model with the custom loss function:
    model_GLAD.compile(optimizer='adam', loss=custom_binary_crossentropy_loss(b=b,mylambda=mylambda), metrics=['accuracy'])
    
    # Train the model
    # We use the same n_min used earlier to find the number of LODA projections:
    y_true = np.full((n_min, best_m), b)
    model_GLAD.fit(X[:n_min,:], y_true, epochs=10, batch_size=32,verbose=0)
    
    # Sanity check: the output for each data point should all be very close to b:
    row = X[1:2] 
    output = model_GLAD.predict(row)
    
    #INITIALIZATION
    #Dealing with edge cases:
    if n_old == 0:
        X_lab = np.empty([0,np.shape(X)[1]])
        Y_lab = []
        all_labeled_scores = np.empty([0,best_m])
        all_unweighted_scores = np.empty([0,best_m])
    
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted  #We directly use Y_muted, as in all the other methods
        which_lab = [i for i in range(len(Y_old)) if not np.isnan(Y_old[i])]
    
        if len(which_lab) == 0:
            X_lab = np.empty([0,np.shape(X)[1]])
            Y_lab = [] 
            all_unweighted_scores = np.empty([0,best_m])
            all_labeled_scores = np.empty([0,best_m])
        else:
            X_lab = X_old[which_lab,:]
            Y_lab = [Y_old[j] for j in which_lab]
            all_unweighted_scores = np.empty([np.shape(X_old)[0],best_m])
            for i, (name, model) in enumerate(models.items()):
                model.fit(X_old)
                y_score = model.score_samples(X_old)
                y_score.dtype = np.float64
                all_unweighted_scores[:,i] = y_score.squeeze()
    
            all_labeled_scores = all_unweighted_scores[which_lab,:]
    
    #LOOPING OVER BATCHES:
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    GLAD_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Calculate all of the scores for X_new:
        all_scores = np.empty([np.shape(X_new)[0],best_m])
        for i, (name, model) in enumerate(models.items()):
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            all_scores[:,i] = y_score.squeeze()
    
        #Update:
        all_unweighted_scores = np.concatenate([all_unweighted_scores,all_scores])
    
        # Pass X_new through the current state of the neural network in order to get the weights out:
        curr_w = model_GLAD.predict(X_new)

        X_new_final_scores = np.sum(all_scores * curr_w, axis=1)
    
        # Sort these scores:
        sorted_indices = np.argsort(X_new_final_scores)
        sorted_scores = X_new_final_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        # Add the scores of the top_k data points to all_labeled_scores:
        all_labeled_scores = np.concatenate([all_labeled_scores,all_scores[top_k_indices,:]])
    
        # Add the relevant data points to X_lab
        X_lab = np.concatenate([X_lab,X_new[top_k_indices,:]])
    
        # Get the true labels of these data points and add them to Y_lab
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
    
        #Update X_so_far (all X data so far):
        X_so_far = X[:(n_old + (r+1)*B),:]
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_GLAD[curr_trial, r] = nY1
    
        curr_w_ext = model_GLAD.predict(X_AUC)
        X_new_final_scores_ext = np.zeros((np.shape(X_AUC)[0],))
        for k in range(np.shape(X_AUC)[0]):
            X_new_final_scores_ext[k] = np.matmul(new_unweighted_validation_scores[k,:],np.transpose(curr_w_ext[k,:]))
        
        GLAD_AUC[r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
        all_GLAD_AUC[curr_trial,r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
    
        fpr, tpr, _ = roc_curve(Y_AUC,X_new_final_scores_ext)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
        #Check if this is the first time through:
        if q_tau_tm1 == -10e7:
            #Dealing with q_tau_tm1
            all_weights_so_far = model_GLAD.predict(X_so_far)
            all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
            for k in range(np.shape(X_so_far)[0]):
                all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))
                
            # Calculate the quantile index without fully sorting
            quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
            partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
            q_tau_tm1 = partitioned_array[quantile_index]

        # 1. Identify the indices of anomalies (Y_lab == 1)
        anomaly_indices = np.where(np.array(Y_lab) == 1)[0]
        
        # 2. Repeat the corresponding rows in X_lab and all_labeled_scores (4 copies for each anomaly)
        X_lab_temp = np.vstack([X_lab] + [X_lab[anomaly_indices]] * 4)  # Repeat rows of X_lab for anomalies
        all_labeled_scores_temp = np.vstack([all_labeled_scores] + [all_labeled_scores[anomaly_indices]] * 4)  # Repeat rows of all_labeled_scores for anomalies
        X_so_far_temp = np.vstack([X_so_far] + [X_lab[anomaly_indices]] * 4)

        # 3. Extend Y_lab_temp with four 1s for each anomaly (4 copies for each anomaly)
        Y_lab_temp = np.concatenate([Y_lab, [1] * 4 * len(anomaly_indices)])

        model_GLAD.compile(optimizer='adam', loss=lambda y_true, y_pred: new_custom_loss_2(X_lab_temp, Y_lab_temp, q_tau_tm1, all_labeled_scores_temp,model_GLAD, X_so_far_temp, mylambda, b), metrics=['accuracy'])

        
        # Train the model for further iterations (e.g., 10 epochs)
        model_GLAD.fit(X_lab_temp, np.array(Y_lab_temp), epochs=10, batch_size=32,verbose=0)
    
        #Dealing with q_tau_tm1
        all_weights_so_far = model_GLAD.predict(X_so_far)
        all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
        for k in range(np.shape(X_so_far)[0]):
            all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

        # Calculate the quantile index without fully sorting
        quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
        partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
        q_tau_tm1 = partitioned_array[quantile_index]
        
    
        #Update indices:
        curr_L_index = curr_L_index + B
    
# Calculate column averages for each array
avg_LODA_0p5_10d = np.mean(all_LODA_AUC, axis=0)
avg_active_LODA_0p5_10d = np.mean(all_active_LODA_AUC, axis=0)
avg_GLAD_0p5_10d = np.mean(all_GLAD_AUC, axis=0)

avg_nY1_LODA_0p5_10d = np.mean(all_nY1_LODA, axis=0)
avg_nY1_active_LODA_0p5_10d = np.mean(all_nY1_active_LODA, axis=0)
avg_nY1_GLAD_0p5_10d = np.mean(all_nY1_GLAD, axis=0)

#Save these results:
np.savez("AUC_0p5_10d.npz",
         avg_LODA_0p5_10d = avg_LODA_0p5_10d,
         avg_active_LODA_0p5_10d = avg_active_LODA_0p5_10d,
         avg_GLAD_0p5_10d = avg_GLAD_0p5_10d,
         avg_nY1_LODA_0p5_10d = avg_nY1_LODA_0p5_10d,
         avg_nY1_active_LODA_0p5_10d = avg_nY1_active_LODA_0p5_10d,
         avg_nY1_GLAD_0p5_10d = avg_nY1_GLAD_0p5_10d,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_0p5_10d.npz",
         weighted_scores_0p5_10d = weighted_scores,
         weighted_validation_scores_0p5_10d = weighted_validation_scores,
         X_new_final_scores_ext_0p5_10d = X_new_final_scores_ext,
        )


### Bring back saved variables

In [None]:
# Ours
data_0p5_10d_onlyAAA = np.load("AUC_0p5_10d_onlyAAA.npz")
avg_AAA_0p5_10d = data_0p5_10d_onlyAAA["avg_AAA_0p5_10d"]
avg_nY1_AAA_0p5_10d = data_0p5_10d_onlyAAA["avg_nY1_AAA_0p5_10d"]


# The others
data_0p5_10d = np.load("AUC_0p5_10d.npz")
avg_LODA_0p5_10d = data_0p5_10d["avg_LODA_0p5_10d"]
avg_active_LODA_0p5_10d = data_0p5_10d["avg_active_LODA_0p5_10d"]
avg_GLAD_0p5_10d = data_0p5_10d["avg_GLAD_0p5_10d"]
avg_nY1_LODA_0p5_10d = data_0p5_10d["avg_nY1_LODA_0p5_10d"]
avg_nY1_active_LODA_0p5_10d = data_0p5_10d["avg_nY1_active_LODA_0p5_10d"]
avg_nY1_GLAD_0p5_10d = data_0p5_10d["avg_nY1_GLAD_0p5_10d"]

### Plot AUC

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the column averages
plt.figure(figsize=(10, 6))

plt.plot(xx,avg_LODA_0p5_10d, label="LODA", marker='o')
plt.plot(xxactiveLODA,avg_active_LODA_0p5_10d, label="Active-LODA", marker='s')
plt.plot(xx,avg_GLAD_0p5_10d, label="GLAD", marker='^')
plt.plot(xx,avg_AAA_0p5_10d, label="AAA", marker='d')

# Add plot title and labels
plt.xlabel("Batch", fontsize=14)
plt.ylabel("Average AUC", fontsize=14)

plt.xticks(ticks=xx)

# Add legend
plt.legend(loc="best", fontsize=12)

# Add grid for better readability
plt.grid(True)

# Show the plot
plt.show()

### Cumulative number of detected anomalies

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the four lines
plt.plot(xx, avg_nY1_LODA_0p5_10d, label="LODA", marker='o')
plt.plot(xxactiveLODA, avg_nY1_active_LODA_0p5_10d, label="Active-LODA", marker='s')
plt.plot(xx, avg_nY1_GLAD_0p5_10d, label="GLAD", marker='^')
plt.plot(xx, avg_nY1_AAA_0p5_10d, label="AAA", marker='d')

# Labels and title
plt.xlabel("Batch")
plt.ylabel("Anomalies detected")
plt.title("Cumulative anomalies detected")

# Legend
plt.legend()

# Grid for better readability
plt.grid(True, linestyle='--', alpha=0.6)

# Show the plot
plt.show()

## 10-dimensional $c=1$

### Our method

In [None]:
np.random.seed(12345678)

In [None]:
############################################################################
# Ten-dimensional Gaussian nominals with one ten-dimensional Gaussian anomaly 
############################################################################

c = 1

#Set n:
num_dim = 10

all_AAA_AUC = np.zeros((n_trials,n_loops))
all_nY1_AAA = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 
    
    def scaled_identity_matrix(num_dim,c_m):
        return [[c_m if i == j else 0 for j in range(num_dim)] for i in range(num_dim)]
    
    # Specific arguments:
    a_list = [[c]*num_dim]
    
    anomaly_cov_list = [scaled_identity_matrix(num_dim,.1)]
    
    nominal_mean = np.array([0]*num_dim)    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array(scaled_identity_matrix(num_dim,1))   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same mixture distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Masking the old data, if it exists:
    if n_old == 0:
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    

    
    
    #####################################################################################################################
    # Our trials: 
    
    #Choose a supervised method that will be applied:
    supervised_method = 'LogisticRegression'

    #Initialization

    #Dealing with edge cases:
    if n_old == 0:
        X_old = None
        Y_old = None
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted.tolist()  #We directly use Y_muted, as in all the other methods
    
    # Run the initialization function InitActiveAGG:
    X_lab, Y_lab, all_labeled_scores = InitActiveAGG(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models)

    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    AAA_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Learn from labeled data, propose new predicted anomalies, and propose other data to label:
        X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 0.4,min_n_nom=5,min_n_anom=1,tau_exp=tau)  
        
        # Pretend to be the expert and add the true labels to the proposed data:
        expert_provided_labels = [Y_new[j] for j in indices_to_expert]
        Y_lab = Y_lab + expert_provided_labels
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_AAA[curr_trial, r] = nY1
        
        #Test the current learned model on the external data in order to calculate the AUC:
        #new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:, 1]
        if learned_model != None:
            new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:,1]
            AAA_AUC[r] = roc_auc_score(Y_AUC,new_preds)
            all_AAA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,new_preds)
        else:
            AAA_AUC[r] = 0.5
            all_AAA_AUC[curr_trial,r] = 0.5
            
        
        
        #Update indices:
        curr_L_index = curr_L_index + B
    
# Calculate column averages for each array
avg_AAA_1_10d = np.mean(all_AAA_AUC, axis=0)
avg_nY1_AAA_1_10d = np.mean(all_nY1_AAA, axis=0)

#Save these results:
np.savez("AUC_1_10d_onlyAAA.npz",
         avg_AAA_1_10d = avg_AAA_1_10d,
         avg_nY1_AAA_1_10d = avg_nY1_AAA_1_10d,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_1_10d_onlyAAA.npz",
         new_preds_1_10d = new_preds
        )

### The other methods

In [None]:
np.random.seed(12345678)

In [None]:
############################################################################
# Ten-dimensional Gaussian nominals with one ten-dimensional Gaussian anomaly 
############################################################################

c = 1

#Set n:
num_dim = 10

all_LODA_AUC = np.zeros((n_trials,n_loops))
all_active_LODA_AUC = np.zeros((n_trials,int(n_loops/2)))
all_GLAD_AUC = np.zeros((n_trials,n_loops))

all_nY1_LODA = np.zeros((n_trials, n_loops))  # Array to store nY1 values
all_nY1_active_LODA = np.zeros((n_trials, int(n_loops/2)))  # Array to store nY1 values
all_nY1_GLAD = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    def scaled_identity_matrix(num_dim,c_m):
        return [[c_m if i == j else 0 for j in range(num_dim)] for i in range(num_dim)]
    
    # Specific arguments:
    a_list = [[c]*num_dim]
    
    anomaly_cov_list = [scaled_identity_matrix(num_dim,.1)]
    
    nominal_mean = np.array([0]*num_dim)    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array(scaled_identity_matrix(num_dim,1))   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same mixture distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Masking the old data, if it exists:
    if n_old == 0:
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    
    #####################################################################################################################
    # LODA TRIALS
    
    curr_L_index = n_old
    LODA_AUC = [0]*n_loops
        
    weighted_scores=np.mean(new_unweighted_validation_scores,axis=1)

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(n_loops):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Final LODA scores are averages over anomaly detectors
        new_LODA_scores = np.mean(new_unweighted_scores,axis=1)

        #############################################################
        # Sort these scores:
        top_k = n_send
        sorted_indices = np.argsort(new_LODA_scores)
        sorted_scores = new_LODA_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_LODA[curr_trial,r] = nY1
        #############################################################
        
        #############################################################
        LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_scores)
        all_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
    #####################################################################################################################
    # ACTIVE-LODA trials:
    
    #We do however have to provide C_tau since it needs to be used before the optimization
    #function. C_tau = 0.03 is the default suggested in Das et al. (2016).
    C_tau = tau
    
    #There are hyperparameters that need to be set in advance for this algorithm. However, 
    #for simplicity we assume they tal the default values in the function optimize_w.
    #C_A = 100  #default in their article
    #C_eta = 1000. #default in their article
    
    #In active LODA, the whole budget of n_send items per loop is dedicated to sending the 
    #items with highest predicted scores:
    top_k = n_send

    #So now that we have the LODA projectors (i.e., a set of best_m anomaly detectors),
    #we can begin.
    
    #We shall initialize the vector of weights as being equal and summing to 1:
    w_old = np.array([1/best_m for i in range(best_m)])
    
    #We also initialize arrays to put the unweighted scores of labeled data into:
    H_A = np.empty((0, best_m))
    H_N = np.empty((0, best_m))
    
    #We also initialize a fake anomaly alert to 0 (see below). This means basically that
    #we have not so far had to add a "fake anomaly" to the optimization due to there only
    #being labeled nominals so far.
    fake_anomaly = 0
    
    #Unlike basic LODA, here anomaly and nominal labels MATTER. In particular, at the
    #beginning, it matters whether there is initial "old" data, and if so, whether some
    #or all of it is already labeled. If n_old > 0, then we have already calculated Y_muted
    #earlier in this script (for some fixed percentage u of this "old" data for which we
    #suppose we know its true label)
    
    #If there were initial data and at least one labeled nominal (following Das et al. (2016))
    if n_old > 0 and np.sum(Y_muted == 0) > 0:
            
        #Extract the initial data from X:
        X_new = X[:n_old,:]
    
        #Calculate the unweighted scores for each LODA projector on the initial data: 
        new_unweighted_scores = np.empty((n_old,best_m))
    
        for i, (name, model) in enumerate(models.items()):
    
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
    
        #Calculate the sum of the linear combination of these scores weighted by w_old:
        new_scores = np.matmul(new_unweighted_scores,w_old)
    
        #Sort new_scores from smallest to largest, whilst retaining the indices.
        sorted_indices = np.argsort(new_scores)
        sorted_scores = new_scores[sorted_indices]
    
        #Calculate q_tau on this initial data: 
        #WARNING: one of the underlying problems with active-LODA is that it basically expects
        #anomalies to have the highest scores from the get go. But here, in this first loop,
        #it very well could be that the anomalies have all the LOWEST scores. The calculation
        #of q_tau at this point is therefore a bit weird/pointless. Also, active-LODA basically
        #expects positive weights, especially in its minimization step, with its L2 norm penalty
        #on the weights. This means that active-LODA does not reach its true potential, as
        #defined and coded by Das et al. (2016). The trouble is is that without a penalty on 
        #making the weights not too big, not too small, and not necessarily positive, it will
        #remain suboptimal. 
        my_quantile_sorted_index = int(np.floor(n_old*(1-C_tau)))-1*(C_tau != 1)
        q_tau = sorted_scores[my_quantile_sorted_index]
    
        #Create the arrays H_A and H_N:
        #First we take H_A. In Das et al. (2016) they allow for the case that there
        #are no labeled anomalies, only labeled nominals.
        if np.sum(Y_muted == 1) == 0:
            #Set a "fake anomaly alert to 1":
            fake_anomaly = 1
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            H_N = new_unweighted_scores[(Y_muted==0),:]
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
                    
        else:
            #There is at least one labeled anomaly:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            H_N = new_unweighted_scores[(Y_muted==0),:]
     
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
            
            temp_unweighted = np.concatenate([H_A,H_N])
            temp_pred = np.matmul(temp_unweighted,w_new)
            temp_YA = [1]*np.shape(H_A)[0]
            temp_YN = [0]*np.shape(H_N)[0]
            temp_Y = temp_YA + temp_YN
            
        #We can now update w_old with the value of w_new. If this whole big loop wasn't run,
        #then w_old will stay at its original value.
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)  
    
    #Else if there were initial data but no labeled nominals: Das et al. (2016)
    #provide no details for what to do here. It is possible though unlikely that
    #in the initial data, there were no labeled nominals but there was at least
    #one labeled anomaly. We do have to check this, since we will need to initialize
    #H_A in this case, even if H_N is empty.
    elif n_old > 0 and np.sum(Y_muted == 0) == 0:
        if np.sum(Y_muted == 1) > 0:
            #initialize H_A:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            #Potential Python issue whereby if there is just one anomaly, weird
            #things happen with array shapes. To guard against this:
            if np.sum(Y_muted == 1) == 1:
                if np.shape(H_A) != (1,best_m):
                    H_A = H_A.reshape(1,-1)
                
    #So, at this point, either we had no old data and w_old is still equal weights, 
    #with H_A, H_N empty arrays, or else there was old data, and H_A and H_N may 
    #have been added to (or not), while w_old may or may not have already been 
    #updated.
    
    #We next move to the batch data.
    
    curr_L_index = n_old
    active_LODA_AUC = [0]*n_loops

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(int(n_loops/2)):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        n_new_true_anom = sum(Y_new==1)
        
        #Update for the next loop:
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Temporary final active-LODA scores are a linear combination over anomaly detectors:
        temp_new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
            
        #Calculate the AUC for the current batch ONLY:
        #active_LODA_AUC[r] = roc_auc_score(Y_new,temp_new_active_LODA_scores) 
        
        #Calculated the weighted scores on the external validation set with the current
        #value of w_old:
        weighted_validation_scores = np.matmul(new_unweighted_validation_scores,w_old)
        active_LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        all_active_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_validation_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)

        new_unweighted_scores = np.concatenate([new_unweighted_scores,H_A,H_N])
        
        #Actual final active-LODA scores are a linear combination over anomaly detectors:
        new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)

        temp_Y = Y_new.tolist() + [1]*np.shape(H_A)[0] + [0]*np.shape(H_N)[0]
        
        #Following the methodology in Das et al. (2016), we should provide the 
        #highest scoring data-point to an expert for labeling. In order to be
        #slightly more general, we shall instead provide the top_k scoring data
        #points to the expert, where top_k has been pre-defined.
        
        #Sort new_active_LODA_scores from smallest to largest, whilst retaining the indices.
        #Remember that these may include items from the previous loop or from the initialization.
        #However, since we appended those on to the end, we know their indices will be B, B+1,...
        #so we will be able to look out for them.
        sorted_indices = np.argsort(new_active_LODA_scores)
        sorted_scores = new_active_LODA_scores[sorted_indices]

        my_quantile_sorted_index = int(np.floor(len(new_active_LODA_scores)*(1-C_tau)))-1*(C_tau != 1)
        
        #We now have to go through the sorted_indices from the end back towards the 
        #beginning until we manage to gather top_k indices which are less than or equal to B-1.
        top_k_indices = []
        n_indices_so_far = 0
        curr_index = len(sorted_indices)
        while n_indices_so_far < top_k:
            curr_index = curr_index - 1
            next_top_index = sorted_indices[curr_index]
            if next_top_index < B:
                top_k_indices.append(next_top_index)
                n_indices_so_far = n_indices_so_far + 1

        #Now we go and get the labels in Y_new associated with these indices:
        #top_k_indices = np.array(top_k_indices)
        Y_expert = Y_new[top_k_indices]

        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_active_LODA[curr_trial,r] = nY1
        
        #Now is a good time to check whether we just found at least one real anomaly while
        #up to now we only had one fake anomaly:
        if sum(Y_expert==1) > 0 and fake_anomaly==1:
            #We now reset H_A to be empty:
            H_A = np.empty((0, best_m))
            #And we set fake_anomaly to 0 forever:
            fake_anomaly = 0
            
        if sum(Y_expert==1) == 0 and fake_anomaly==1:
            #We reset H_A back to empty again:
            H_A = np.empty((0, best_m))
                
        #We then need to append the relevant unweighted scores to the current H_A and H_N
        for j in range(top_k):
            if Y_expert[j]==1:
                H_A = np.concatenate([H_A,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
            else:
                H_N = np.concatenate([H_N,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
                
        #Here we have to check whether this was the first time H_A actually contained
        #a real anomaly or not, rather than a temporary one (if there were no labeled anomalies)    
        
        #Now since top_k > 0 we know that there is a positive number of labeled data for sure.
        #We always have to retain however the possibility that all of the labeled data so far
        #are nominals. 
        if np.shape(H_A)[0]==0:
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
        else:
            w_new = optimize_w_2(H_A, H_N, q_tau)
    
                
    
        #Update q_tau on this data:
        q_tau = sorted_scores[my_quantile_sorted_index]
        
        #update w_old
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
        temp_unweighted = np.concatenate([H_A,H_N])
        temp_pred = np.matmul(temp_unweighted,w_old)
        temp_YA = [1]*np.shape(H_A)[0]
        temp_YN = [0]*np.shape(H_N)[0]
        temp_Y = temp_YA + temp_YN
    
    #####################################################################################################################
    # GLAD trials:
    
    #Initialize some parameters:
    b = 0.5
    mylambda = 1
    top_k = n_send
    q_tau_tm1 = -10e7
    
    # Build the model:
    model_GLAD = build_neural_network(np.shape(X)[1], best_m)
    
    # Compile the model with the custom loss function:
    model_GLAD.compile(optimizer='adam', loss=custom_binary_crossentropy_loss(b=b,mylambda=mylambda), metrics=['accuracy'])
    
    # Train the model
    # We use the same n_min used earlier to find the number of LODA projections:
    y_true = np.full((n_min, best_m), b)
    model_GLAD.fit(X[:n_min,:], y_true, epochs=10, batch_size=32,verbose=0)
    
    # Sanity check: the output for each data point should all be very close to b:
    row = X[1:2] 
    output = model_GLAD.predict(row)
    
    #INITIALIZATION
    #Dealing with edge cases:
    if n_old == 0:
        X_lab = np.empty([0,np.shape(X)[1]])
        Y_lab = []
        all_labeled_scores = np.empty([0,best_m])
        all_unweighted_scores = np.empty([0,best_m])
    
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted  #We directly use Y_muted, as in all the other methods
        which_lab = [i for i in range(len(Y_old)) if not np.isnan(Y_old[i])]
    
        if len(which_lab) == 0:
            X_lab = np.empty([0,np.shape(X)[1]])
            Y_lab = [] 
            all_unweighted_scores = np.empty([0,best_m])
            all_labeled_scores = np.empty([0,best_m])
        else:
            X_lab = X_old[which_lab,:]
            Y_lab = [Y_old[j] for j in which_lab]
            all_unweighted_scores = np.empty([np.shape(X_old)[0],best_m])
            for i, (name, model) in enumerate(models.items()):
                model.fit(X_old)
                y_score = model.score_samples(X_old)
                y_score.dtype = np.float64
                all_unweighted_scores[:,i] = y_score.squeeze()
    
            all_labeled_scores = all_unweighted_scores[which_lab,:]
    
    #LOOPING OVER BATCHES:
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    GLAD_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Calculate all of the scores for X_new:
        all_scores = np.empty([np.shape(X_new)[0],best_m])
        for i, (name, model) in enumerate(models.items()):
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            all_scores[:,i] = y_score.squeeze()
    
        #Update:
        all_unweighted_scores = np.concatenate([all_unweighted_scores,all_scores])
    
        # Pass X_new through the current state of the neural network in order to get the weights out:
        curr_w = model_GLAD.predict(X_new)

        X_new_final_scores = np.sum(all_scores * curr_w, axis=1)
    
        # Sort these scores:
        sorted_indices = np.argsort(X_new_final_scores)
        sorted_scores = X_new_final_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        # Add the scores of the top_k data points to all_labeled_scores:
        all_labeled_scores = np.concatenate([all_labeled_scores,all_scores[top_k_indices,:]])
    
        # Add the relevant data points to X_lab
        X_lab = np.concatenate([X_lab,X_new[top_k_indices,:]])
    
        # Get the true labels of these data points and add them to Y_lab
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
    
        #Update X_so_far (all X data so far):
        X_so_far = X[:(n_old + (r+1)*B),:]
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_GLAD[curr_trial, r] = nY1
    
        curr_w_ext = model_GLAD.predict(X_AUC)
        X_new_final_scores_ext = np.zeros((np.shape(X_AUC)[0],))
        for k in range(np.shape(X_AUC)[0]):
            X_new_final_scores_ext[k] = np.matmul(new_unweighted_validation_scores[k,:],np.transpose(curr_w_ext[k,:]))
        
        GLAD_AUC[r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
        all_GLAD_AUC[curr_trial,r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
    
        fpr, tpr, _ = roc_curve(Y_AUC,X_new_final_scores_ext)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
        #Check if this is the first time through:
        if q_tau_tm1 == -10e7:
            #Dealing with q_tau_tm1
            all_weights_so_far = model_GLAD.predict(X_so_far)
            all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
            for k in range(np.shape(X_so_far)[0]):
                all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

            # Calculate the quantile index without fully sorting
            quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
            partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
            q_tau_tm1 = partitioned_array[quantile_index]

        # 1. Identify the indices of anomalies (Y_lab == 1)
        anomaly_indices = np.where(np.array(Y_lab) == 1)[0]
        
        # 2. Repeat the corresponding rows in X_lab and all_labeled_scores (4 copies for each anomaly)
        X_lab_temp = np.vstack([X_lab] + [X_lab[anomaly_indices]] * 4)  # Repeat rows of X_lab for anomalies
        all_labeled_scores_temp = np.vstack([all_labeled_scores] + [all_labeled_scores[anomaly_indices]] * 4)  # Repeat rows of all_labeled_scores for anomalies
        X_so_far_temp = np.vstack([X_so_far] + [X_lab[anomaly_indices]] * 4)

        # 3. Extend Y_lab_temp with four 1s for each anomaly (4 copies for each anomaly)
        Y_lab_temp = np.concatenate([Y_lab, [1] * 4 * len(anomaly_indices)])
        
        model_GLAD.compile(optimizer='adam', loss=lambda y_true, y_pred: new_custom_loss_2(X_lab_temp, Y_lab_temp, q_tau_tm1, all_labeled_scores_temp,model_GLAD, X_so_far_temp, mylambda, b), metrics=['accuracy'])
        
        # Train the model for further iterations (e.g., 10 epochs)
        model_GLAD.fit(X_lab_temp, np.array(Y_lab_temp), epochs=10, batch_size=32,verbose=0)
    
        #Dealing with q_tau_tm1
        all_weights_so_far = model_GLAD.predict(X_so_far)
        all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
        for k in range(np.shape(X_so_far)[0]):
            all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

        # Calculate the quantile index without fully sorting
        quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
        partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
        q_tau_tm1 = partitioned_array[quantile_index]
        
    
        #Update indices:
        curr_L_index = curr_L_index + B
    
# Calculate column averages for each array
avg_LODA_1_10d = np.mean(all_LODA_AUC, axis=0)
avg_active_LODA_1_10d = np.mean(all_active_LODA_AUC, axis=0)
avg_GLAD_1_10d = np.mean(all_GLAD_AUC, axis=0)

avg_nY1_LODA_1_10d = np.mean(all_nY1_LODA, axis=0)
avg_nY1_active_LODA_1_10d = np.mean(all_nY1_active_LODA, axis=0)
avg_nY1_GLAD_1_10d = np.mean(all_nY1_GLAD, axis=0)

#Save these results:
np.savez("AUC_1_10d.npz",
         avg_LODA_1_10d = avg_LODA_1_10d,
         avg_active_LODA_1_10d = avg_active_LODA_1_10d,
         avg_GLAD_1_10d = avg_GLAD_1_10d,
         avg_nY1_LODA_1_10d = avg_nY1_LODA_1_10d,
         avg_nY1_active_LODA_1_10d = avg_nY1_active_LODA_1_10d,
         avg_nY1_GLAD_1_10d = avg_nY1_GLAD_1_10d,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_1_10d.npz",
         weighted_scores_1_10d = weighted_scores,
         weighted_validation_scores_1_10d = weighted_validation_scores,
         X_new_final_scores_ext_1_10d = X_new_final_scores_ext,
        )

### Bring back saved variables

In [None]:
# Ours
data_1_10d_onlyAAA = np.load("AUC_1_10d_onlyAAA.npz")
avg_AAA_1_10d = data_1_10d_onlyAAA["avg_AAA_1_10d"]
avg_nY1_AAA_1_10d = data_1_10d_onlyAAA["avg_nY1_AAA_1_10d"]


# The others
data_1_10d = np.load("AUC_1_10d.npz")
avg_LODA_1_10d = data_1_10d["avg_LODA_1_10d"]
avg_active_LODA_1_10d = data_1_10d["avg_active_LODA_1_10d"]
avg_GLAD_1_10d = data_1_10d["avg_GLAD_1_10d"]
avg_nY1_LODA_1_10d = data_1_10d["avg_nY1_LODA_1_10d"]
avg_nY1_active_LODA_1_10d = data_1_10d["avg_nY1_active_LODA_1_10d"]
avg_nY1_GLAD_1_10d = data_1_10d["avg_nY1_GLAD_1_10d"]

### Plot AUC

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the column averages
plt.figure(figsize=(10, 6))

plt.plot(xx,avg_LODA_1_10d, label="LODA", marker='o')
plt.plot(xxactiveLODA,avg_active_LODA_1_10d, label="Active-LODA", marker='s')
plt.plot(xx,avg_GLAD_1_10d, label="GLAD", marker='^')
plt.plot(xx,avg_AAA_1_10d, label="AAA", marker='d')

# Add plot title and labels
plt.xlabel("Batch", fontsize=14)
plt.ylabel("Average AUC", fontsize=14)

plt.xticks(ticks=xx)

# Add legend
plt.legend(loc="best", fontsize=12)

# Add grid for better readability
plt.grid(True)

# Show the plot
plt.show()

### Plot cumulative number of anomalies detected

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the four lines
plt.plot(xx, avg_nY1_LODA_1_10d, label="LODA", marker='o')
plt.plot(xxactiveLODA, avg_nY1_active_LODA_1_10d, label="Active-LODA", marker='s')
plt.plot(xx, avg_nY1_GLAD_1_10d, label="GLAD", marker='^')
plt.plot(xx, avg_nY1_AAA_1_10d, label="AAA", marker='d')

# Labels and title
plt.xlabel("Batch")
plt.ylabel("Anomalies detected")
plt.title("Cumulative anomalies detected")

# Legend
plt.legend()

# Grid for better readability
plt.grid(True, linestyle='--', alpha=0.6)

# Show the plot
plt.show()

## 10-dimensional $c=1.5$

### Our method

In [None]:
#With the same random seed np.random.seed(123456789) as everywhere else, active-LODA crashes due to the solver failing on the 5th loop.
#We therefore change the seed here for these trials. 

np.random.seed(12345678)

In [None]:
############################################################################
# Ten-dimensional Gaussian nominals with one ten-dimensional Gaussian anomaly 
############################################################################

c = 1.5

#Set n:
num_dim = 10

all_AAA_AUC = np.zeros((n_trials,n_loops))
all_nY1_AAA = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    
    
    def scaled_identity_matrix(num_dim,c_m):
        return [[c_m if i == j else 0 for j in range(num_dim)] for i in range(num_dim)]
    
    # Specific arguments:
    a_list = [[c]*num_dim]
    
    anomaly_cov_list = [scaled_identity_matrix(num_dim,.1)]
    
    nominal_mean = np.array([0]*num_dim)    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array(scaled_identity_matrix(num_dim,1))   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same mixture distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Masking the old data, if it exists:
    if n_old == 0:
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    

    
    #####################################################################################################################
    # Our trials: 
    
    #Choose a supervised method that will be applied:
    supervised_method = 'LogisticRegression'

    #Initialization

    #Dealing with edge cases:
    if n_old == 0:
        X_old = None
        Y_old = None
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted.tolist()  #We directly use Y_muted, as in all the other methods
    
    # Run the initialization function InitActiveAGG:
    X_lab, Y_lab, all_labeled_scores = InitActiveAGG(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models)

    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    AAA_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Learn from labeled data, propose new predicted anomalies, and propose other data to label:
        X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 0.4,min_n_nom=5,min_n_anom=1,tau_exp=tau)  
        
        # Pretend to be the expert and add the true labels to the proposed data:
        expert_provided_labels = [Y_new[j] for j in indices_to_expert]
        Y_lab = Y_lab + expert_provided_labels
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_AAA[curr_trial, r] = nY1
        
        #Test the current learned model on the external data in order to calculate the AUC:
        if learned_model != None:
            new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:,1]
            AAA_AUC[r] = roc_auc_score(Y_AUC,new_preds)
            all_AAA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,new_preds)
        else:
            AAA_AUC[r] = 0.5
            all_AAA_AUC[curr_trial,r] = 0.5
            
        
        
        #Update indices:
        curr_L_index = curr_L_index + B
    
# Calculate column averages for each array
avg_AAA_1p5_10d = np.mean(all_AAA_AUC, axis=0)
avg_nY1_AAA_1p5_10d = np.mean(all_nY1_AAA, axis=0)

#Save these results:
np.savez("AUC_1p5_10d_onlyAAA.npz",
         avg_AAA_1p5_10d = avg_AAA_1p5_10d,
         avg_nY1_AAA_1p5_10d = avg_nY1_AAA_1p5_10d,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_1p5_10d_onlyAAA.npz",
         new_preds_1p5_10d = new_preds
        )

### The other three methods

In [None]:
#With the same random seed np.random.seed(123456789) as everywhere else, active-LODA crashes due to the solver failing on the 5th loop.
#We therefore change the seed here for these trials. 
np.random.seed(12345678)

In [None]:
############################################################################
# Ten-dimensional Gaussian nominals with one ten-dimensional Gaussian anomaly 
############################################################################

c = 1.5

#Set n:
num_dim = 10

all_LODA_AUC = np.zeros((n_trials,n_loops))
all_active_LODA_AUC = np.zeros((n_trials,int(n_loops/2)))
all_GLAD_AUC = np.zeros((n_trials,n_loops))

all_nY1_LODA = np.zeros((n_trials, n_loops))  # Array to store nY1 values
all_nY1_active_LODA = np.zeros((n_trials, int(n_loops/2)))  # Array to store nY1 values
all_nY1_GLAD = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    def scaled_identity_matrix(num_dim,c_m):
        return [[c_m if i == j else 0 for j in range(num_dim)] for i in range(num_dim)]
    
    # Specific arguments:
    a_list = [[c]*num_dim]
    
    anomaly_cov_list = [scaled_identity_matrix(num_dim,.1)]
    
    nominal_mean = np.array([0]*num_dim)    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array(scaled_identity_matrix(num_dim,1))   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same mixture distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Masking the old data, if it exists:
    if n_old == 0:
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    
    #####################################################################################################################
    # LODA TRIALS
    
    curr_L_index = n_old
    LODA_AUC = [0]*n_loops
        
    weighted_scores=np.mean(new_unweighted_validation_scores,axis=1)

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(n_loops):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Final LODA scores are averages over anomaly detectors
        new_LODA_scores = np.mean(new_unweighted_scores,axis=1)

        #############################################################
        # Sort these scores:
        top_k = n_send
        sorted_indices = np.argsort(new_LODA_scores)
        sorted_scores = new_LODA_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_LODA[curr_trial,r] = nY1
        #############################################################
        
        
        #############################################################
        LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_scores)
        all_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
    #####################################################################################################################
    # ACTIVE-LODA trials:
    
    #We do however have to provide C_tau since it needs to be used before the optimization
    #function. C_tau = 0.03 is the default suggested in Das et al. (2016).
    C_tau = tau
    
    #There are hyperparameters that need to be set in advance for this algorithm. However, 
    #for simplicity we assume they tal the default values in the function optimize_w.
    #C_A = 100  #default in their article
    #C_eta = 1000. #default in their article
    
    #In active LODA, the whole budget of n_send items per loop is dedicated to sending the 
    #items with highest predicted scores:
    top_k = n_send

    #So now that we have the LODA projectors (i.e., a set of best_m anomaly detectors),
    #we can begin.
    
    #We shall initialize the vector of weights as being equal and summing to 1:
    w_old = np.array([1/best_m for i in range(best_m)])
    
    #We also initialize arrays to put the unweighted scores of labeled data into:
    H_A = np.empty((0, best_m))
    H_N = np.empty((0, best_m))
    
    #We also initialize a fake anomaly alert to 0 (see below). This means basically that
    #we have not so far had to add a "fake anomaly" to the optimization due to there only
    #being labeled nominals so far.
    fake_anomaly = 0
    
    #Unlike basic LODA, here anomaly and nominal labels MATTER. In particular, at the
    #beginning, it matters whether there is initial "old" data, and if so, whether some
    #or all of it is already labeled. If n_old > 0, then we have already calculated Y_muted
    #earlier in this script (for some fixed percentage u of this "old" data for which we
    #suppose we know its true label)
    
    #If there were initial data and at least one labeled nominal (following Das et al. (2016))
    if n_old > 0 and np.sum(Y_muted == 0) > 0:
            
        #Extract the initial data from X:
        X_new = X[:n_old,:]
    
        #Calculate the unweighted scores for each LODA projector on the initial data: 
        new_unweighted_scores = np.empty((n_old,best_m))
    
        for i, (name, model) in enumerate(models.items()):
    
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
    
        #Calculate the sum of the linear combination of these scores weighted by w_old:
        new_scores = np.matmul(new_unweighted_scores,w_old)
    
        #Sort new_scores from smallest to largest, whilst retaining the indices.
        sorted_indices = np.argsort(new_scores)
        sorted_scores = new_scores[sorted_indices]
    
        #Calculate q_tau on this initial data: 
        #WARNING: one of the underlying problems with active-LODA is that it basically expects
        #anomalies to have the highest scores from the get go. But here, in this first loop,
        #it very well could be that the anomalies have all the LOWEST scores. The calculation
        #of q_tau at this point is therefore a bit weird/pointless. Also, active-LODA basically
        #expects positive weights, especially in its minimization step, with its L2 norm penalty
        #on the weights. This means that active-LODA does not reach its true potential, as
        #defined and coded by Das et al. (2016). The trouble is is that without a penalty on 
        #making the weights not too big, not too small, and not necessarily positive, it will
        #remain suboptimal. 
        my_quantile_sorted_index = int(np.floor(n_old*(1-C_tau)))-1*(C_tau != 1)
        q_tau = sorted_scores[my_quantile_sorted_index]
    
        #Create the arrays H_A and H_N:
        #First we take H_A. In Das et al. (2016) they allow for the case that there
        #are no labeled anomalies, only labeled nominals.
        if np.sum(Y_muted == 1) == 0:
            #Set a "fake anomaly alert to 1":
            fake_anomaly = 1
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            H_N = new_unweighted_scores[(Y_muted==0),:]
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
                    
        else:
            #There is at least one labeled anomaly:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            H_N = new_unweighted_scores[(Y_muted==0),:]
     
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
            
            temp_unweighted = np.concatenate([H_A,H_N])
            temp_pred = np.matmul(temp_unweighted,w_new)
            temp_YA = [1]*np.shape(H_A)[0]
            temp_YN = [0]*np.shape(H_N)[0]
            temp_Y = temp_YA + temp_YN
            
        #We can now update w_old with the value of w_new. If this whole big loop wasn't run,
        #then w_old will stay at its original value.
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
    
    #Else if there were initial data but no labeled nominals: Das et al. (2016)
    #provide no details for what to do here. It is possible though unlikely that
    #in the initial data, there were no labeled nominals but there was at least
    #one labeled anomaly. We do have to check this, since we will need to initialize
    #H_A in this case, even if H_N is empty.
    elif n_old > 0 and np.sum(Y_muted == 0) == 0:
        if np.sum(Y_muted == 1) > 0:
            #initialize H_A:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            #Potential Python issue whereby if there is just one anomaly, weird
            #things happen with array shapes. To guard against this:
            if np.sum(Y_muted == 1) == 1:
                if np.shape(H_A) != (1,best_m):
                    H_A = H_A.reshape(1,-1)
                
    #So, at this point, either we had no old data and w_old is still equal weights, 
    #with H_A, H_N empty arrays, or else there was old data, and H_A and H_N may 
    #have been added to (or not), while w_old may or may not have already been 
    #updated.
    
    #We next move to the batch data.
    
    curr_L_index = n_old
    active_LODA_AUC = [0]*n_loops

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(int(n_loops/2)):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        n_new_true_anom = sum(Y_new==1)
        
        #Update for the next loop:
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Temporary final active-LODA scores are a linear combination over anomaly detectors:
        temp_new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        #Calculated the weighted scores on the external validation set with the current
        #value of w_old:
        weighted_validation_scores = np.matmul(new_unweighted_validation_scores,w_old)
        active_LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        all_active_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_validation_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
        
        new_unweighted_scores = np.concatenate([new_unweighted_scores,H_A,H_N])
        
        #Actual final active-LODA scores are a linear combination over anomaly detectors:
        new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        temp_Y = Y_new.tolist() + [1]*np.shape(H_A)[0] + [0]*np.shape(H_N)[0]
        
        #Following the methodology in Das et al. (2016), we should provide the 
        #highest scoring data-point to an expert for labeling. In order to be
        #slightly more general, we shall instead provide the top_k scoring data
        #points to the expert, where top_k has been pre-defined.
        
        #Sort new_active_LODA_scores from smallest to largest, whilst retaining the indices.
        #Remember that these may include items from the previous loop or from the initialization.
        #However, since we appended those on to the end, we know their indices will be B, B+1,...
        #so we will be able to look out for them.
        sorted_indices = np.argsort(new_active_LODA_scores)
        sorted_scores = new_active_LODA_scores[sorted_indices]

        my_quantile_sorted_index = int(np.floor(len(new_active_LODA_scores)*(1-C_tau)))-1*(C_tau != 1)
        
        #We now have to go through the sorted_indices from the end back towards the 
        #beginning until we manage to gather top_k indices which are less than or equal to B-1.
        top_k_indices = []
        n_indices_so_far = 0
        curr_index = len(sorted_indices)
        while n_indices_so_far < top_k:
            curr_index = curr_index - 1
            next_top_index = sorted_indices[curr_index]
            if next_top_index < B:
                top_k_indices.append(next_top_index)
                n_indices_so_far = n_indices_so_far + 1
                
        #Now we go and get the labels in Y_new associated with these indices:
        #top_k_indices = np.array(top_k_indices)
        Y_expert = Y_new[top_k_indices]

        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_active_LODA[curr_trial,r] = nY1
        
        #Now is a good time to check whether we just found at least one real anomaly while
        #up to now we only had one fake anomaly:
        if sum(Y_expert==1) > 0 and fake_anomaly==1:
            #We now reset H_A to be empty:
            H_A = np.empty((0, best_m))
            #And we set fake_anomaly to 0 forever:
            fake_anomaly = 0
            
        if sum(Y_expert==1) == 0 and fake_anomaly==1:
            #We reset H_A back to empty again:
            H_A = np.empty((0, best_m))
                
        #We then need to append the relevant unweighted scores to the current H_A and H_N
        for j in range(top_k):
            if Y_expert[j]==1:
                H_A = np.concatenate([H_A,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
            else:
                H_N = np.concatenate([H_N,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
                
        #Here we have to check whether this was the first time H_A actually contained
        #a real anomaly or not, rather than a temporary one (if there were no labeled anomalies)    
        
        #Now since top_k > 0 we know that there is a positive number of labeled data for sure.
        #We always have to retain however the possibility that all of the labeled data so far
        #are nominals. 
        if np.shape(H_A)[0]==0:
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
        else:
            w_new = optimize_w_2(H_A, H_N, q_tau)
    
                
    
        #Update q_tau on this data:
        q_tau = sorted_scores[my_quantile_sorted_index]
        
        #update w_old
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
        temp_unweighted = np.concatenate([H_A,H_N])
        temp_pred = np.matmul(temp_unweighted,w_old)
        temp_YA = [1]*np.shape(H_A)[0]
        temp_YN = [0]*np.shape(H_N)[0]
        temp_Y = temp_YA + temp_YN
    
    #####################################################################################################################
    # GLAD trials:
    
    #Initialize some parameters:
    b = 0.5
    mylambda = 1
    top_k = n_send
    q_tau_tm1 = -10e7
    
    # Build the model:
    model_GLAD = build_neural_network(np.shape(X)[1], best_m)
    
    # Compile the model with the custom loss function:
    model_GLAD.compile(optimizer='adam', loss=custom_binary_crossentropy_loss(b=b,mylambda=mylambda), metrics=['accuracy'])
    
    # Train the model
    # We use the same n_min used earlier to find the number of LODA projections:
    y_true = np.full((n_min, best_m), b)
    model_GLAD.fit(X[:n_min,:], y_true, epochs=10, batch_size=32,verbose=0)
    
    # Sanity check: the output for each data point should all be very close to b:
    row = X[1:2] 
    output = model_GLAD.predict(row)
    
    #INITIALIZATION
    #Dealing with edge cases:
    if n_old == 0:
        X_lab = np.empty([0,np.shape(X)[1]])
        Y_lab = []
        all_labeled_scores = np.empty([0,best_m])
        all_unweighted_scores = np.empty([0,best_m])
    
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted  #We directly use Y_muted, as in all the other methods
        which_lab = [i for i in range(len(Y_old)) if not np.isnan(Y_old[i])]
    
        if len(which_lab) == 0:
            X_lab = np.empty([0,np.shape(X)[1]])
            Y_lab = [] 
            all_unweighted_scores = np.empty([0,best_m])
            all_labeled_scores = np.empty([0,best_m])
        else:
            X_lab = X_old[which_lab,:]
            Y_lab = [Y_old[j] for j in which_lab]
            all_unweighted_scores = np.empty([np.shape(X_old)[0],best_m])
            for i, (name, model) in enumerate(models.items()):
                model.fit(X_old)
                y_score = model.score_samples(X_old)
                y_score.dtype = np.float64
                all_unweighted_scores[:,i] = y_score.squeeze()
    
            all_labeled_scores = all_unweighted_scores[which_lab,:]
    
    #LOOPING OVER BATCHES:
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    GLAD_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Calculate all of the scores for X_new:
        all_scores = np.empty([np.shape(X_new)[0],best_m])
        for i, (name, model) in enumerate(models.items()):
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            all_scores[:,i] = y_score.squeeze()
    
        #Update:
        all_unweighted_scores = np.concatenate([all_unweighted_scores,all_scores])
    
        # Pass X_new through the current state of the neural network in order to get the weights out:
        curr_w = model_GLAD.predict(X_new)

        X_new_final_scores = np.sum(all_scores * curr_w, axis=1)
    
        # Sort these scores:
        sorted_indices = np.argsort(X_new_final_scores)
        sorted_scores = X_new_final_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        # Add the scores of the top_k data points to all_labeled_scores:
        all_labeled_scores = np.concatenate([all_labeled_scores,all_scores[top_k_indices,:]])
    
        # Add the relevant data points to X_lab
        X_lab = np.concatenate([X_lab,X_new[top_k_indices,:]])
    
        # Get the true labels of these data points and add them to Y_lab
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
    
        #Update X_so_far (all X data so far):
        X_so_far = X[:(n_old + (r+1)*B),:]
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_GLAD[curr_trial, r] = nY1
    
        curr_w_ext = model_GLAD.predict(X_AUC)
        X_new_final_scores_ext = np.zeros((np.shape(X_AUC)[0],))
        for k in range(np.shape(X_AUC)[0]):
            X_new_final_scores_ext[k] = np.matmul(new_unweighted_validation_scores[k,:],np.transpose(curr_w_ext[k,:]))
        
        GLAD_AUC[r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
        all_GLAD_AUC[curr_trial,r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
    
        fpr, tpr, _ = roc_curve(Y_AUC,X_new_final_scores_ext)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
        #Check if this is the first time through:
        if q_tau_tm1 == -10e7:
            #Dealing with q_tau_tm1
            all_weights_so_far = model_GLAD.predict(X_so_far)
            all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
            for k in range(np.shape(X_so_far)[0]):
                all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

            # Calculate the quantile index without fully sorting
            quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
            partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
            q_tau_tm1 = partitioned_array[quantile_index]

        # 1. Identify the indices of anomalies (Y_lab == 1)
        anomaly_indices = np.where(np.array(Y_lab) == 1)[0]
        
        # 2. Repeat the corresponding rows in X_lab and all_labeled_scores (4 copies for each anomaly)
        X_lab_temp = np.vstack([X_lab] + [X_lab[anomaly_indices]] * 4)  # Repeat rows of X_lab for anomalies
        all_labeled_scores_temp = np.vstack([all_labeled_scores] + [all_labeled_scores[anomaly_indices]] * 4)  # Repeat rows of all_labeled_scores for anomalies
        X_so_far_temp = np.vstack([X_so_far] + [X_lab[anomaly_indices]] * 4)

        # 3. Extend Y_lab_temp with four 1s for each anomaly (4 copies for each anomaly)
        Y_lab_temp = np.concatenate([Y_lab, [1] * 4 * len(anomaly_indices)])
        
        model_GLAD.compile(optimizer='adam', loss=lambda y_true, y_pred: new_custom_loss_2(X_lab_temp, Y_lab_temp, q_tau_tm1, all_labeled_scores_temp,model_GLAD, X_so_far_temp, mylambda, b), metrics=['accuracy'])
       
        # Train the model for further iterations (e.g., 10 epochs)
        model_GLAD.fit(X_lab_temp, np.array(Y_lab_temp), epochs=10, batch_size=32,verbose=0)

        #Dealing with q_tau_tm1
        all_weights_so_far = model_GLAD.predict(X_so_far)
        all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
        for k in range(np.shape(X_so_far)[0]):
            all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

        # Calculate the quantile index without fully sorting
        quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
        partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
        q_tau_tm1 = partitioned_array[quantile_index]
        
    
        #Update indices:
        curr_L_index = curr_L_index + B
    
# Calculate column averages for each array
avg_LODA_1p5_10d = np.mean(all_LODA_AUC, axis=0)
avg_active_LODA_1p5_10d = np.mean(all_active_LODA_AUC, axis=0)
avg_GLAD_1p5_10d = np.mean(all_GLAD_AUC, axis=0)

avg_nY1_LODA_1p5_10d = np.mean(all_nY1_LODA, axis=0)
avg_nY1_active_LODA_1p5_10d = np.mean(all_nY1_active_LODA, axis=0)
avg_nY1_GLAD_1p5_10d = np.mean(all_nY1_GLAD, axis=0)

#Save these results:
np.savez("AUC_1p5_10d.npz",
         avg_LODA_1p5_10d = avg_LODA_1p5_10d,
         avg_active_LODA_1p5_10d = avg_active_LODA_1p5_10d,
         avg_GLAD_1p5_10d = avg_GLAD_1p5_10d,
         avg_nY1_LODA_1p5_10d = avg_nY1_LODA_1p5_10d,
         avg_nY1_active_LODA_1p5_10d = avg_nY1_active_LODA_1p5_10d,
         avg_nY1_GLAD_1p5_10d = avg_nY1_GLAD_1p5_10d,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_1p5_10d.npz",
         weighted_scores_1p5_10d = weighted_scores,
         weighted_validation_scores_1p5_10d = weighted_validation_scores,
         X_new_final_scores_ext_1p5_10d = X_new_final_scores_ext,
        )

### Bring back saved data

In [None]:
# Ours
data_1p5_10d_onlyAAA = np.load("AUC_1p5_10d_onlyAAA.npz")
avg_AAA_1p5_10d = data_1p5_10d_onlyAAA["avg_AAA_1p5_10d"]
avg_nY1_AAA_1p5_10d = data_1p5_10d_onlyAAA["avg_nY1_AAA_1p5_10d"]


# The others
data_1p5_10d = np.load("AUC_1p5_10d.npz")
avg_LODA_1p5_10d = data_1p5_10d["avg_LODA_1p5_10d"]
avg_active_LODA_1p5_10d = data_1p5_10d["avg_active_LODA_1p5_10d"]
avg_GLAD_1p5_10d = data_1p5_10d["avg_GLAD_1p5_10d"]
avg_nY1_LODA_1p5_10d = data_1p5_10d["avg_nY1_LODA_1p5_10d"]
avg_nY1_active_LODA_1p5_10d = data_1p5_10d["avg_nY1_active_LODA_1p5_10d"]
avg_nY1_GLAD_1p5_10d = data_1p5_10d["avg_nY1_GLAD_1p5_10d"]

### Plot AUC

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the column averages
plt.figure(figsize=(10, 6))

plt.plot(xx,avg_LODA_1p5_10d, label="LODA", marker='o')
plt.plot(xxactiveLODA,avg_active_LODA_1p5_10d, label="Active-LODA", marker='s')
plt.plot(xx,avg_GLAD_1p5_10d, label="GLAD", marker='^')
plt.plot(xx,avg_AAA_1p5_10d, label="AAA", marker='d')

# Add plot title and labels
plt.xlabel("Batch", fontsize=14)
plt.ylabel("Average AUC", fontsize=14)

plt.xticks(ticks=xx)

# Add legend
plt.legend(loc="best", fontsize=12)

# Add grid for better readability
plt.grid(True)

# Show the plot
plt.show()

### Plot cumulative number of anomalies detected

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the four lines
plt.plot(xx, avg_nY1_LODA_1p5_10d, label="LODA", marker='o')
plt.plot(xxactiveLODA, avg_nY1_active_LODA_1p5_10d, label="Active-LODA", marker='s')
plt.plot(xx, avg_nY1_GLAD_1p5_10d, label="GLAD", marker='^')
plt.plot(xx, avg_nY1_AAA_1p5_10d, label="AAA", marker='d')

# Labels and title
plt.xlabel("Batch")
plt.ylabel("Anomalies detected")
plt.title("Cumulative anomalies detected")

# Legend
plt.legend()

# Grid for better readability
plt.grid(True, linestyle='--', alpha=0.6)

# Show the plot
plt.show()

## Bring back saved variables and plot them

In [None]:
# Ours
data_0p5_10d_onlyAAA = np.load("AUC_0p5_10d_onlyAAA.npz")
avg_AAA_0p5_10d = data_0p5_10d_onlyAAA["avg_AAA_0p5_10d"]
avg_nY1_AAA_0p5_10d = data_0p5_10d_onlyAAA["avg_nY1_AAA_0p5_10d"]
# The others
data_0p5_10d = np.load("AUC_0p5_10d.npz")
avg_LODA_0p5_10d = data_0p5_10d["avg_LODA_0p5_10d"]
avg_active_LODA_0p5_10d = data_0p5_10d["avg_active_LODA_0p5_10d"]
avg_GLAD_0p5_10d = data_0p5_10d["avg_GLAD_0p5_10d"]
avg_nY1_LODA_0p5_10d = data_0p5_10d["avg_nY1_LODA_0p5_10d"]
avg_nY1_active_LODA_0p5_10d = data_0p5_10d["avg_nY1_active_LODA_0p5_10d"]
avg_nY1_GLAD_0p5_10d = data_0p5_10d["avg_nY1_GLAD_0p5_10d"]

# Ours
data_1_10d_onlyAAA = np.load("AUC_1_10d_onlyAAA.npz")
avg_AAA_1_10d = data_1_10d_onlyAAA["avg_AAA_1_10d"]
avg_nY1_AAA_1_10d = data_1_10d_onlyAAA["avg_nY1_AAA_1_10d"]
# The others
data_1_10d = np.load("AUC_1_10d.npz")
avg_LODA_1_10d = data_1_10d["avg_LODA_1_10d"]
avg_active_LODA_1_10d = data_1_10d["avg_active_LODA_1_10d"]
avg_GLAD_1_10d = data_1_10d["avg_GLAD_1_10d"]
avg_nY1_LODA_1_10d = data_1_10d["avg_nY1_LODA_1_10d"]
avg_nY1_active_LODA_1_10d = data_1_10d["avg_nY1_active_LODA_1_10d"]
avg_nY1_GLAD_1_10d = data_1_10d["avg_nY1_GLAD_1_10d"]

# Ours
data_1p5_10d_onlyAAA = np.load("AUC_1p5_10d_onlyAAA.npz")
avg_AAA_1p5_10d = data_1p5_10d_onlyAAA["avg_AAA_1p5_10d"]
avg_nY1_AAA_1p5_10d = data_1p5_10d_onlyAAA["avg_nY1_AAA_1p5_10d"]
# The others
data_1p5_10d = np.load("AUC_1p5_10d.npz")
avg_LODA_1p5_10d = data_1p5_10d["avg_LODA_1p5_10d"]
avg_active_LODA_1p5_10d = data_1p5_10d["avg_active_LODA_1p5_10d"]
avg_GLAD_1p5_10d = data_1p5_10d["avg_GLAD_1p5_10d"]
avg_nY1_LODA_1p5_10d = data_1p5_10d["avg_nY1_LODA_1p5_10d"]
avg_nY1_active_LODA_1p5_10d = data_1p5_10d["avg_nY1_active_LODA_1p5_10d"]
avg_nY1_GLAD_1p5_10d = data_1p5_10d["avg_nY1_GLAD_1p5_10d"]

In [None]:
num_dim = 10

# Create the subplot grid (2 rows, 2 columns)
fig, axes = plt.subplots(1, 3, figsize=(18,6))
axes = axes.flatten()

# Add the AUC subplots:

xx = list(range(1, n_loops + 1))
xxactiveLODA = list(range(1, int(n_loops/2) + 1))

# Iterate through the sets of arrays for AUC plots
for i, (avg_LODA_plot, avg_active_LODA_plot, avg_GLAD_plot, avg_AAA_plot) in enumerate([
    (avg_LODA_0p5_10d, avg_active_LODA_0p5_10d, avg_GLAD_0p5_10d, avg_AAA_0p5_10d),
    (avg_LODA_1_10d, avg_active_LODA_1_10d, avg_GLAD_1_10d, avg_AAA_1_10d),
    (avg_LODA_1p5_10d, avg_active_LODA_1p5_10d, avg_GLAD_1p5_10d, avg_AAA_1p5_10d),
]):
    # Target subplot in the second column (index 1 for columns)
    ax = axes[i]
    
    # Plot the data on the subplot
    ax.plot(xx, avg_LODA_plot, label="LODA",linewidth = 2.5)
    ax.plot(xxactiveLODA, avg_active_LODA_plot, label="Active-LODA",linewidth = 2.5)
    ax.plot(xx, avg_GLAD_plot, label="GLAD", linewidth = 2.5)
    ax.plot(xx, avg_AAA_plot, label="AAA", linewidth = 2.5)
    
    # Add labels and grid
    ax.set_xlabel("Batch", fontsize=12)
    ax.set_ylabel("Average AUC", fontsize=12)
    ax.set_xticks(xx)
    ax.grid(True)
    
    # Add legend
    ax.legend(loc="best", fontsize=10)
    
    # Optional: Add a title for each subplot
    ax.set_title(f"Average AUC over time (c={(i+1)/2})", fontsize=14)

    ax.set_xticks(range(5, n_loops + 1, 5))

# Adjust layout for better spacing
plt.tight_layout()

# Save the plot:
fig.savefig("10d_Gaussian_Nominals_and_Anomalies.pdf", format="pdf", bbox_inches="tight")

# Display the plot
plt.show()

In [None]:
# Create the subplot grid (3 rows, 2 columns)
fig, axes = plt.subplots(3, 2, figsize=(8, 12))  # Adjust figsize as needed

# Define x-axis values
xx = list(range(1, n_loops + 1))
xxactiveLODA = list(range(1, int(n_loops/2) + 1))

# Plot in the first column only
for i, (avg_LODA_plot, avg_active_LODA_plot, avg_GLAD_plot, avg_AAA_plot) in enumerate([
    (avg_LODA_0p5_10d, avg_active_LODA_0p5_10d, avg_GLAD_0p5_10d, avg_AAA_0p5_10d),
    (avg_LODA_1_10d, avg_active_LODA_1_10d, avg_GLAD_1_10d, avg_AAA_1_10d),
    (avg_LODA_1p5_10d, avg_active_LODA_1p5_10d, avg_GLAD_1p5_10d, avg_AAA_1p5_10d),
]):
    ax = axes[i, 0]  # First column

    ax.plot(xx, avg_LODA_plot, label="LODA", linewidth=2.5)
    ax.plot(xxactiveLODA, avg_active_LODA_plot, label="Active-LODA", linewidth=2.5)
    ax.plot(xx, avg_GLAD_plot, label="GLAD", linewidth=2.5)
    ax.plot(xx, avg_AAA_plot, label="AAA", linewidth=2.5)

    ax.set_xlabel("Batch", fontsize=15)
    ax.set_ylabel("Average AUC", fontsize=15)
    #ax.set_xticks(range(5, n_loops + 1, 5))
    ax.grid(True)
    ax.legend(loc="best", fontsize=13)
    ax.set_title(f"AUC over time (c = {(i+1)/2})", fontsize=17)

for i, (avg_nY1_LODA_plot, avg_nY1_active_LODA_plot, avg_nY1_GLAD_plot, avg_nY1_AAA_plot) in enumerate([
    (avg_nY1_LODA_0p5_10d, avg_nY1_active_LODA_0p5_10d, avg_nY1_GLAD_0p5_10d, avg_nY1_AAA_0p5_10d),
    (avg_nY1_LODA_1_10d, avg_nY1_active_LODA_1_10d, avg_nY1_GLAD_1_10d, avg_nY1_AAA_1_10d),
    (avg_nY1_LODA_1p5_10d, avg_nY1_active_LODA_1p5_10d, avg_nY1_GLAD_1p5_10d, avg_nY1_AAA_1p5_10d),
]):
    ax = axes[i, 1]  # second column

    ax.plot(xx, avg_nY1_LODA_plot, label="LODA", linewidth=1.5, linestyle='-')
    ax.plot(xxactiveLODA, avg_nY1_active_LODA_plot, label="Active-LODA", linewidth=1.5, linestyle=':')
    ax.plot(xx, avg_nY1_GLAD_plot, label="GLAD", linewidth=1.5, linestyle='-.')
    ax.plot(xx, avg_nY1_AAA_plot, label="AAA", linewidth=1.5, linestyle='-')

    ax.set_xlabel("Batch", fontsize=15)
    ax.set_ylabel("Anomalies detected", fontsize=15)
    ax.set_title("Cumul. anomalies detected", fontsize=17)
    ax.grid(True, linestyle='--', alpha=0.6)
    ax.legend(fontsize=13)

# Adjust layout and save
plt.tight_layout()
fig.savefig("10d_Gaussian_Nominals_and_Anomalies2.pdf", format="pdf", bbox_inches="tight")
plt.show()

# Three further two-dimensional settings

## Two-dimensional uniform distribution with reject regions at anomalies

### Our method

In [None]:
np.random.seed(123456789)

In [None]:

all_AAA_AUC = np.zeros((n_trials,n_loops))
all_nY1_AAA = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    num_dim = 2
    a_list = [np.ones(num_dim)*0.5]
    epsilon = np.sqrt(tau)/2
    L = num_dim
    
    # Sampling
    X, Y = sample_data(
        uniform_sampling_point_mass_with_epsilon_n_dim_rejection,
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,  
        tau = tau, 
        a_list = a_list, 
        epsilon = epsilon, 
        L = L, 
        lower=0, 
        upper=1
    )
    
    
    X_AUC, Y_AUC = sample_data(
        uniform_sampling_point_mass_with_epsilon_n_dim_rejection,
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,  
        tau = tau, 
        a_list = a_list, 
        epsilon = epsilon, 
        L = L, 
        lower=0, 
        upper=1
    )
    
    
    # Your code for plotting data
    plt.figure(figsize=(10, 8))
    
    # Plot density of nominals (blue)
    sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6)
    
    # Plot density of anomalies (red)
    sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6)
    
    # Customizing the plot
    plt.title("Density Plot (2D)", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Manually create legend
    legend_elements = [
        Line2D([0], [0], color='blue', lw=2, label="Nominals"),
        Line2D([0], [0], color='red', lw=2, label="Anomalies")
    ]
    
    plt.legend(handles=legend_elements, loc='upper right')  # Manually adding legend
    
    plt.grid(True)
    plt.show()
    
    
    plt.figure(figsize=(10, 8))
    
    # Scatter plot for nominals (blue)
    plt.scatter(X[Y == 0][:, 0], X[Y == 0][:, 1], color='blue', alpha=0.6, label="Nominals")
    
    # Scatter plot for anomalies (red)
    plt.scatter(X[Y == 1][:, 0], X[Y == 1][:, 1], color='red', alpha=0.6, label="Anomalies")
    
    # Customizing the plot
    plt.title("Scatter Plot of Nominals and Anomalies", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Adding legend
    plt.legend(loc='upper right')  # Automatically adding legend with labels from scatter plots
    
    # Adding grid
    plt.grid(True)
    
    # Showing the plot
    plt.show()
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        #print('There is no initial data.')
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    
    
    #####################################################################################################################
    # Our trials: 
    
    #Choose a supervised method that will be applied:
    supervised_method = 'LogisticRegression'

    #Initialization

    #Dealing with edge cases:
    if n_old == 0:
        X_old = None
        Y_old = None
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted.tolist()  #We directly use Y_muted, as in all the other methods
    
    # Run the initialization function InitActiveAGG:
    X_lab, Y_lab, all_labeled_scores = InitActiveAGG(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models)


    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    AAA_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Learn from labeled data, propose new predicted anomalies, and propose other data to label:
        X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 0.4,min_n_nom=5,min_n_anom=1,tau_exp=tau)  
        
        # Pretend to be the expert and add the true labels to the proposed data:
        expert_provided_labels = [Y_new[j] for j in indices_to_expert]
        Y_lab = Y_lab + expert_provided_labels
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_AAA[curr_trial, r] = nY1
        
        #Test the current learned model on the external data in order to calculate the AUC:
        #new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:, 1]
        if learned_model != None:
            new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:,1]
            AAA_AUC[r] = roc_auc_score(Y_AUC,new_preds)
            all_AAA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,new_preds)
        else:
            AAA_AUC[r] = 0.5
            all_AAA_AUC[curr_trial,r] = 0.5
            
        
        
        #Update indices:
        curr_L_index = curr_L_index + B
        
# Calculate column averages for each array
avg_AAA_unif = np.mean(all_AAA_AUC, axis=0)
avg_nY1_AAA_unif = np.mean(all_nY1_AAA, axis=0)


#Save these results:
np.savez("AUC_unif_onlyAAA.npz",
         avg_AAA_unif = avg_AAA_unif,
         avg_nY1_AAA_unif = avg_nY1_AAA_unif,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_unif_onlyAAA.npz",
         new_preds_unif = new_preds
        )

### The other methods

In [None]:
np.random.seed(123456789)

In [None]:
all_LODA_AUC = np.zeros((n_trials,n_loops))
all_active_LODA_AUC = np.zeros((n_trials,int(n_loops/2)))
all_GLAD_AUC = np.zeros((n_trials,n_loops))

all_nY1_LODA = np.zeros((n_trials, n_loops))  # Array to store nY1 values
all_nY1_active_LODA = np.zeros((n_trials, int(n_loops/2)))  # Array to store nY1 values
all_nY1_GLAD = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    num_dim = 2
    a_list = [np.ones(num_dim)*0.5]
    epsilon = np.sqrt(tau)/2
    L = num_dim
    
    # Sampling
    X, Y = sample_data(
        uniform_sampling_point_mass_with_epsilon_n_dim_rejection,
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,  
        tau = tau, 
        a_list = a_list, 
        epsilon = epsilon, 
        L = L, 
        lower=0, 
        upper=1
    )
    
    
    X_AUC, Y_AUC = sample_data(
        uniform_sampling_point_mass_with_epsilon_n_dim_rejection,
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,  
        tau = tau, 
        a_list = a_list, 
        epsilon = epsilon, 
        L = L, 
        lower=0, 
        upper=1
    )
    
    
    # Your code for plotting data
    plt.figure(figsize=(10, 8))
    
    # Plot density of nominals (blue)
    sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6)
    
    # Plot density of anomalies (red)
    sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6)
    
    # Customizing the plot
    plt.title("Density Plot (2D)", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Manually create legend
    legend_elements = [
        Line2D([0], [0], color='blue', lw=2, label="Nominals"),
        Line2D([0], [0], color='red', lw=2, label="Anomalies")
    ]
    
    plt.legend(handles=legend_elements, loc='upper right')  # Manually adding legend
    
    plt.grid(True)
    plt.show()
    
    
    plt.figure(figsize=(10, 8))
    
    # Scatter plot for nominals (blue)
    plt.scatter(X[Y == 0][:, 0], X[Y == 0][:, 1], color='blue', alpha=0.6, label="Nominals")
    
    # Scatter plot for anomalies (red)
    plt.scatter(X[Y == 1][:, 0], X[Y == 1][:, 1], color='red', alpha=0.6, label="Anomalies")
    
    # Customizing the plot
    plt.title("Scatter Plot of Nominals and Anomalies", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Adding legend
    plt.legend(loc='upper right')  # Automatically adding legend with labels from scatter plots
    
    # Adding grid
    plt.grid(True)
    
    # Showing the plot
    plt.show()
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    
    #####################################################################################################################
    # LODA TRIALS
    
    curr_L_index = n_old
    LODA_AUC = [0]*n_loops
        
    weighted_scores=np.mean(new_unweighted_validation_scores,axis=1)

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(n_loops):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Final LODA scores are averages over anomaly detectors
        new_LODA_scores = np.mean(new_unweighted_scores,axis=1)

        #############################################################
        # Sort these scores:
        top_k = n_send
        sorted_indices = np.argsort(new_LODA_scores)
        sorted_scores = new_LODA_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_LODA[curr_trial,r] = nY1
        #############################################################
        
        #############################################################
        LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_scores)
        all_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
    #####################################################################################################################
    # ACTIVE-LODA trials:
    
    #We do however have to provide C_tau since it needs to be used before the optimization
    #function. C_tau = 0.03 is the default suggested in Das et al. (2016).
    C_tau = tau
    
    #There are hyperparameters that need to be set in advance for this algorithm. However, 
    #for simplicity we assume they tal the default values in the function optimize_w.
    #C_A = 100  #default in their article
    #C_eta = 1000. #default in their article
    
    #In active LODA, the whole budget of n_send items per loop is dedicated to sending the 
    #items with highest predicted scores:
    top_k = n_send
    
    #So now that we have the LODA projectors (i.e., a set of best_m anomaly detectors),
    #we can begin.
    
    #We shall initialize the vector of weights as being equal and summing to 1:
    w_old = np.array([1/best_m for i in range(best_m)])
    
    #We also initialize arrays to put the unweighted scores of labeled data into:
    H_A = np.empty((0, best_m))
    H_N = np.empty((0, best_m))
    
    #We also initialize a fake anomaly alert to 0 (see below). This means basically that
    #we have not so far had to add a "fake anomaly" to the optimization due to there only
    #being labeled nominals so far.
    fake_anomaly = 0
    
    #Unlike basic LODA, here anomaly and nominal labels MATTER. In particular, at the
    #beginning, it matters whether there is initial "old" data, and if so, whether some
    #or all of it is already labeled. If n_old > 0, then we have already calculated Y_muted
    #earlier in this script (for some fixed percentage u of this "old" data for which we
    #suppose we know its true label)
    
    #If there were initial data and at least one labeled nominal (following Das et al. (2016))
    if n_old > 0 and np.sum(Y_muted == 0) > 0:
            
        #Extract the initial data from X:
        X_new = X[:n_old,:]
    
        #Calculate the unweighted scores for each LODA projector on the initial data: 
        new_unweighted_scores = np.empty((n_old,best_m))
    
        for i, (name, model) in enumerate(models.items()):
    
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
    
        #Calculate the sum of the linear combination of these scores weighted by w_old:
        new_scores = np.matmul(new_unweighted_scores,w_old)
    
        #Sort new_scores from smallest to largest, whilst retaining the indices.
        sorted_indices = np.argsort(new_scores)
        sorted_scores = new_scores[sorted_indices]
    
        #Calculate q_tau on this initial data: 
        #WARNING: one of the underlying problems with active-LODA is that it basically expects
        #anomalies to have the highest scores from the get go. But here, in this first loop,
        #it very well could be that the anomalies have all the LOWEST scores. The calculation
        #of q_tau at this point is therefore a bit weird/pointless. Also, active-LODA basically
        #expects positive weights, especially in its minimization step, with its L2 norm penalty
        #on the weights. This means that active-LODA does not reach its true potential, as
        #defined and coded by Das et al. (2016). The trouble is is that without a penalty on 
        #making the weights not too big, not too small, and not necessarily positive, it will
        #remain suboptimal. 
        my_quantile_sorted_index = int(np.floor(n_old*(1-C_tau)))-1*(C_tau != 1)
        q_tau = sorted_scores[my_quantile_sorted_index]
    
        #Create the arrays H_A and H_N:
        #First we take H_A. In Das et al. (2016) they allow for the case that there
        #are no labeled anomalies, only labeled nominals.
        if np.sum(Y_muted == 1) == 0:
            #Set a "fake anomaly alert to 1":
            fake_anomaly = 1
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            H_N = new_unweighted_scores[(Y_muted==0),:]
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)

        else:
            #There is at least one labeled anomaly:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            H_N = new_unweighted_scores[(Y_muted==0),:]
     
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
            
            temp_unweighted = np.concatenate([H_A,H_N])
            temp_pred = np.matmul(temp_unweighted,w_new)
            temp_YA = [1]*np.shape(H_A)[0]
            temp_YN = [0]*np.shape(H_N)[0]
            temp_Y = temp_YA + temp_YN
            
        #We can now update w_old with the value of w_new. If this whole big loop wasn't run,
        #then w_old will stay at its original value.
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
    
    #Else if there were initial data but no labeled nominals: Das et al. (2016)
    #provide no details for what to do here. It is possible though unlikely that
    #in the initial data, there were no labeled nominals but there was at least
    #one labeled anomaly. We do have to check this, since we will need to initialize
    #H_A in this case, even if H_N is empty.
    elif n_old > 0 and np.sum(Y_muted == 0) == 0:
        if np.sum(Y_muted == 1) > 0:
            #initialize H_A:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            #Potential Python issue whereby if there is just one anomaly, weird
            #things happen with array shapes. To guard against this:
            if np.sum(Y_muted == 1) == 1:
                if np.shape(H_A) != (1,best_m):
                    H_A = H_A.reshape(1,-1)
                
    #So, at this point, either we had no old data and w_old is still equal weights, 
    #with H_A, H_N empty arrays, or else there was old data, and H_A and H_N may 
    #have been added to (or not), while w_old may or may not have already been 
    #updated.
    
    #We next move to the batch data.
    
    curr_L_index = n_old
    active_LODA_AUC = [0]*n_loops

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(int(n_loops/2)):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        n_new_true_anom = sum(Y_new==1)
        
        #Update for the next loop:
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Temporary final active-LODA scores are a linear combination over anomaly detectors:
        temp_new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
            
        #Calculate the AUC for the current batch ONLY:
        #active_LODA_AUC[r] = roc_auc_score(Y_new,temp_new_active_LODA_scores) 
        
        #Calculated the weighted scores on the external validation set with the current
        #value of w_old:
        weighted_validation_scores = np.matmul(new_unweighted_validation_scores,w_old)
        active_LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        all_active_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_validation_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
        
        new_unweighted_scores = np.concatenate([new_unweighted_scores,H_A,H_N])
        
        #Actual final active-LODA scores are a linear combination over anomaly detectors:
        new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        temp_Y = Y_new.tolist() + [1]*np.shape(H_A)[0] + [0]*np.shape(H_N)[0]

        #Following the methodology in Das et al. (2016), we should provide the 
        #highest scoring data-point to an expert for labeling. In order to be
        #slightly more general, we shall instead provide the top_k scoring data
        #points to the expert, where top_k has been pre-defined.
        
        #Sort new_active_LODA_scores from smallest to largest, whilst retaining the indices.
        #Remember that these may include items from the previous loop or from the initialization.
        #However, since we appended those on to the end, we know their indices will be B, B+1,...
        #so we will be able to look out for them.
        sorted_indices = np.argsort(new_active_LODA_scores)
        sorted_scores = new_active_LODA_scores[sorted_indices]
    
        my_quantile_sorted_index = int(np.floor(len(new_active_LODA_scores)*(1-C_tau)))-1*(C_tau != 1)
        
        #We now have to go through the sorted_indices from the end back towards the 
        #beginning until we manage to gather top_k indices which are less than or equal to B-1.
        top_k_indices = []
        n_indices_so_far = 0
        curr_index = len(sorted_indices)
        while n_indices_so_far < top_k:
            curr_index = curr_index - 1
            next_top_index = sorted_indices[curr_index]
            if next_top_index < B:
                top_k_indices.append(next_top_index)
                n_indices_so_far = n_indices_so_far + 1
                
        #Now we go and get the labels in Y_new associated with these indices:
        #top_k_indices = np.array(top_k_indices)
        Y_expert = Y_new[top_k_indices]

        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_active_LODA[curr_trial,r] = nY1
        
        #Now is a good time to check whether we just found at least one real anomaly while
        #up to now we only had one fake anomaly:
        if sum(Y_expert==1) > 0 and fake_anomaly==1:
            #We now reset H_A to be empty:
            H_A = np.empty((0, best_m))
            #And we set fake_anomaly to 0 forever:
            fake_anomaly = 0
            
        if sum(Y_expert==1) == 0 and fake_anomaly==1:
            #We reset H_A back to empty again:
            H_A = np.empty((0, best_m))
                
        #We then need to append the relevant unweighted scores to the current H_A and H_N
        for j in range(top_k):
            if Y_expert[j]==1:
                H_A = np.concatenate([H_A,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
            else:
                H_N = np.concatenate([H_N,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
                
        #Here we have to check whether this was the first time H_A actually contained
        #a real anomaly or not, rather than a temporary one (if there were no labeled anomalies)    
        
        #Now since top_k > 0 we know that there is a positive number of labeled data for sure.
        #We always have to retain however the possibility that all of the labeled data so far
        #are nominals. 
        if np.shape(H_A)[0]==0:
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
        else:
            w_new = optimize_w_2(H_A, H_N, q_tau)
    
                
    
        #Update q_tau on this data:
        q_tau = sorted_scores[my_quantile_sorted_index]
        
        #update w_old
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
        temp_unweighted = np.concatenate([H_A,H_N])
        temp_pred = np.matmul(temp_unweighted,w_old)
        temp_YA = [1]*np.shape(H_A)[0]
        temp_YN = [0]*np.shape(H_N)[0]
        temp_Y = temp_YA + temp_YN

    #####################################################################################################################
    # GLAD trials:
    
    #Initialize some parameters:
    b = 0.5
    mylambda = 1
    top_k = n_send
    q_tau_tm1 = -10e7
    
    # Build the model:
    model_GLAD = build_neural_network(np.shape(X)[1], best_m)
    
    # Compile the model with the custom loss function:
    model_GLAD.compile(optimizer='adam', loss=custom_binary_crossentropy_loss(b=b,mylambda=mylambda), metrics=['accuracy'])
    
    # Train the model
    # We use the same n_min used earlier to find the number of LODA projections:
    y_true = np.full((n_min, best_m), b)
    model_GLAD.fit(X[:n_min,:], y_true, epochs=10, batch_size=32,verbose=0)
    
    # Sanity check: the output for each data point should all be very close to b:
    row = X[1:2] 
    output = model_GLAD.predict(row)    
    
    #INITIALIZATION
    #Dealing with edge cases:
    if n_old == 0:
        X_lab = np.empty([0,np.shape(X)[1]])
        Y_lab = []
        all_labeled_scores = np.empty([0,best_m])
        all_unweighted_scores = np.empty([0,best_m])
    
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted  #We directly use Y_muted, as in all the other methods
        which_lab = [i for i in range(len(Y_old)) if not np.isnan(Y_old[i])]
    
        if len(which_lab) == 0:
            X_lab = np.empty([0,np.shape(X)[1]])
            Y_lab = [] 
            all_unweighted_scores = np.empty([0,best_m])
            all_labeled_scores = np.empty([0,best_m])
        else:
            X_lab = X_old[which_lab,:]
            Y_lab = [Y_old[j] for j in which_lab]
            all_unweighted_scores = np.empty([np.shape(X_old)[0],best_m])
            for i, (name, model) in enumerate(models.items()):
                model.fit(X_old)
                y_score = model.score_samples(X_old)
                y_score.dtype = np.float64
                all_unweighted_scores[:,i] = y_score.squeeze()
    
            all_labeled_scores = all_unweighted_scores[which_lab,:]
    
    #LOOPING OVER BATCHES:
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    GLAD_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Calculate all of the scores for X_new:
        all_scores = np.empty([np.shape(X_new)[0],best_m])
        for i, (name, model) in enumerate(models.items()):
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            all_scores[:,i] = y_score.squeeze()
    
        #Update:
        all_unweighted_scores = np.concatenate([all_unweighted_scores,all_scores])
    
        # Pass X_new through the current state of the neural network in order to get the weights out:
        curr_w = model_GLAD.predict(X_new)

        X_new_final_scores = np.sum(all_scores * curr_w, axis=1)
    
        # Sort these scores:
        sorted_indices = np.argsort(X_new_final_scores)
        sorted_scores = X_new_final_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        # Add the scores of the top_k data points to all_labeled_scores:
        all_labeled_scores = np.concatenate([all_labeled_scores,all_scores[top_k_indices,:]])
    
        # Add the relevant data points to X_lab
        X_lab = np.concatenate([X_lab,X_new[top_k_indices,:]])
    
        # Get the true labels of these data points and add them to Y_lab
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
    
        #Update X_so_far (all X data so far):
        X_so_far = X[:(n_old + (r+1)*B),:]
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_GLAD[curr_trial, r] = nY1
    
        curr_w_ext = model_GLAD.predict(X_AUC)
        X_new_final_scores_ext = np.zeros((np.shape(X_AUC)[0],))
        for k in range(np.shape(X_AUC)[0]):
            X_new_final_scores_ext[k] = np.matmul(new_unweighted_validation_scores[k,:],np.transpose(curr_w_ext[k,:]))
        
        GLAD_AUC[r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
        all_GLAD_AUC[curr_trial,r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
    
        fpr, tpr, _ = roc_curve(Y_AUC,X_new_final_scores_ext)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)

        #Check if this is the first time through:
        if q_tau_tm1 == -10e7:
            #Dealing with q_tau_tm1
            all_weights_so_far = model_GLAD.predict(X_so_far)
            all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
            for k in range(np.shape(X_so_far)[0]):
                all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

            # Calculate the quantile index without fully sorting
            quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
            partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
            q_tau_tm1 = partitioned_array[quantile_index]

        # 1. Identify the indices of anomalies (Y_lab == 1)
        anomaly_indices = np.where(np.array(Y_lab) == 1)[0]
        
        # 2. Repeat the corresponding rows in X_lab and all_labeled_scores (4 copies for each anomaly)
        X_lab_temp = np.vstack([X_lab] + [X_lab[anomaly_indices]] * 4)  # Repeat rows of X_lab for anomalies
        all_labeled_scores_temp = np.vstack([all_labeled_scores] + [all_labeled_scores[anomaly_indices]] * 4)  # Repeat rows of all_labeled_scores for anomalies
        X_so_far_temp = np.vstack([X_so_far] + [X_lab[anomaly_indices]] * 4)

        # 3. Extend Y_lab_temp with four 1s for each anomaly (4 copies for each anomaly)
        Y_lab_temp = np.concatenate([Y_lab, [1] * 4 * len(anomaly_indices)])
        
        model_GLAD.compile(optimizer='adam', loss=lambda y_true, y_pred: new_custom_loss_2(X_lab_temp, Y_lab_temp, q_tau_tm1, all_labeled_scores_temp,model_GLAD, X_so_far_temp, mylambda, b), metrics=['accuracy'])

        
        # Train the model for further iterations (e.g., 10 epochs)
        model_GLAD.fit(X_lab_temp, np.array(Y_lab_temp), epochs=10, batch_size=32,verbose=0)
    
        #Dealing with q_tau_tm1
        all_weights_so_far = model_GLAD.predict(X_so_far)
        all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
        for k in range(np.shape(X_so_far)[0]):
            all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

        # Calculate the quantile index without fully sorting
        quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
        partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
        q_tau_tm1 = partitioned_array[quantile_index]
        
    
        #Update indices:
        curr_L_index = curr_L_index + B
    
# Calculate column averages for each array
avg_LODA_unif = np.mean(all_LODA_AUC, axis=0)
avg_active_LODA_unif = np.mean(all_active_LODA_AUC, axis=0)
avg_GLAD_unif = np.mean(all_GLAD_AUC, axis=0)


avg_nY1_LODA_unif = np.mean(all_nY1_LODA, axis=0)
avg_nY1_active_LODA_unif = np.mean(all_nY1_active_LODA, axis=0)
avg_nY1_GLAD_unif = np.mean(all_nY1_GLAD, axis=0)


#Save these results:
np.savez("AUC_unif.npz",
         avg_LODA_unif = avg_LODA_unif,
         avg_active_LODA_unif = avg_active_LODA_unif,
         avg_GLAD_unif = avg_GLAD_unif,
         avg_nY1_LODA_unif = avg_nY1_LODA_unif,
         avg_nY1_active_LODA_unif = avg_nY1_active_LODA_unif,
         avg_nY1_GLAD_unif = avg_nY1_GLAD_unif,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_unif.npz",
         weighted_scores_unif = weighted_scores,
         weighted_validation_scores_unif = weighted_validation_scores,
         X_new_final_scores_ext_unif = X_new_final_scores_ext,
        )

### Bring back saved variables

In [None]:
# Ours
data_unif_onlyAAA = np.load("AUC_unif_onlyAAA.npz")
avg_AAA_unif = data_unif_onlyAAA["avg_AAA_unif"]
avg_nY1_AAA_unif = data_unif_onlyAAA["avg_nY1_AAA_unif"]


# The others
data_unif = np.load("AUC_unif.npz")
avg_LODA_unif = data_unif["avg_LODA_unif"]
avg_active_LODA_unif = data_unif["avg_active_LODA_unif"]
avg_GLAD_unif = data_unif["avg_GLAD_unif"]
avg_nY1_LODA_unif = data_unif["avg_nY1_LODA_unif"]
avg_nY1_active_LODA_unif = data_unif["avg_nY1_active_LODA_unif"]
avg_nY1_GLAD_unif = data_unif["avg_nY1_GLAD_unif"]

### Plot AUC

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the column averages
plt.figure(figsize=(10, 6))

plt.plot(xx,avg_LODA_unif, label="LODA", marker='o')
plt.plot(xxactiveLODA,avg_active_LODA_unif, label="Active-LODA", marker='s')
plt.plot(xx,avg_GLAD_unif, label="GLAD", marker='^')
plt.plot(xx,avg_AAA_unif, label="AAA", marker='d')

# Add plot title and labels
plt.xlabel("Batch", fontsize=14)
plt.ylabel("Average AUC", fontsize=14)

plt.xticks(ticks=xx)

# Add legend
plt.legend(loc="best", fontsize=12)

# Add grid for better readability
plt.grid(True)

# Show the plot
plt.show()

### Plot cumulative number of anomalies detected

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the four lines
plt.plot(xx, avg_nY1_LODA_unif, label="LODA", marker='o')
plt.plot(xxactiveLODA, avg_nY1_active_LODA_unif, label="Active-LODA", marker='s')
plt.plot(xx, avg_nY1_GLAD_unif, label="GLAD", marker='^')
plt.plot(xx, avg_nY1_AAA_unif, label="AAA", marker='d')

# Labels and title
plt.xlabel("Batch")
plt.ylabel("Anomalies detected")
plt.title("Cumulative anomalies detected")

# Legend
plt.legend()

# Grid for better readability
plt.grid(True, linestyle='--', alpha=0.6)

# Show the plot
plt.show()

## Two-dimensional uniform distribution on circle with reject regions at anomalies

### Our method

In [None]:
np.random.seed(123456789)

In [None]:

all_AAA_AUC = np.zeros((n_trials,n_loops))
all_nY1_AAA = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    #Set n:
    num_dim = 2
    # Specific arguments:
    a_list = [np.array([0.0, 0.0])]
    L = num_dim
    radius = 1
    epsilon = radius*np.exp((1/L)*(np.log(tau)-np.log(1-tau)))
    
    # Sampling
    X, Y = sample_data(
        sample_uniform_with_anomalies_in_ball,
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,  
        tau = tau, 
        a_list = a_list, 
        epsilon = epsilon, 
        L = L,
        radius = radius
    )
    
    
    X_AUC, Y_AUC = sample_data(
        sample_uniform_with_anomalies_in_ball,
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,  
        tau = tau, 
        a_list = a_list, 
        epsilon = epsilon, 
        L = L, 
        radius=radius
    )
    
    
    # Your code for plotting data
    plt.figure(figsize=(10, 8))
    
    # Plot density of nominals (blue)
    sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6)
    
    # Plot density of anomalies (red)
    sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6)
    
    # Customizing the plot
    plt.title("Density Plot (2D)", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Manually create legend
    legend_elements = [
        Line2D([0], [0], color='blue', lw=2, label="Nominals"),
        Line2D([0], [0], color='red', lw=2, label="Anomalies")
    ]
    
    plt.legend(handles=legend_elements, loc='upper right')  # Manually adding legend
    
    plt.grid(True)
    plt.show()
    
    
    plt.figure(figsize=(10, 8))
    
    # Scatter plot for nominals (blue)
    plt.scatter(X[Y == 0][:, 0], X[Y == 0][:, 1], color='blue', alpha=0.6, label="Nominals")
    
    # Scatter plot for anomalies (red)
    plt.scatter(X[Y == 1][:, 0], X[Y == 1][:, 1], color='red', alpha=0.6, label="Anomalies")
    
    # Customizing the plot
    plt.title("Scatter Plot of Nominals and Anomalies", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Adding legend
    plt.legend(loc='upper right')  # Automatically adding legend with labels from scatter plots
    
    # Adding grid
    plt.grid(True)
    
    # Showing the plot
    plt.show()
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    
    
    #####################################################################################################################
    # Our trials: 
    
    #Choose a supervised method that will be applied:
    supervised_method = 'LogisticRegression'

    #Initialization

    #Dealing with edge cases:
    if n_old == 0:
        X_old = None
        Y_old = None
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted.tolist()  #We directly use Y_muted, as in all the other methods
    
    # Run the initialization function InitActiveAGG:
    X_lab, Y_lab, all_labeled_scores = InitActiveAGG(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models)

    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    AAA_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Learn from labeled data, propose new predicted anomalies, and propose other data to label:
        X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 0.4,min_n_nom=5,min_n_anom=1,tau_exp=tau)  
        
        # Pretend to be the expert and add the true labels to the proposed data:
        expert_provided_labels = [Y_new[j] for j in indices_to_expert]
        Y_lab = Y_lab + expert_provided_labels
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_AAA[curr_trial, r] = nY1
        
        #Test the current learned model on the external data in order to calculate the AUC:
        #new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:, 1]
        if learned_model != None:
            new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:,1]
            AAA_AUC[r] = roc_auc_score(Y_AUC,new_preds)
            all_AAA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,new_preds)
        else:
            AAA_AUC[r] = 0.5
            all_AAA_AUC[curr_trial,r] = 0.5
            
        
        
        #Update indices:
        curr_L_index = curr_L_index + B  

avg_AAA_circ = np.mean(all_AAA_AUC, axis=0)
avg_nY1_AAA_circ = np.mean(all_nY1_AAA, axis=0)

#Save these results:
np.savez("AUC_circ_onlyAAA.npz",
         avg_AAA_circ = avg_AAA_circ,
         avg_nY1_AAA_circ = avg_nY1_AAA_circ,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_circ_onlyAAA.npz",
         new_preds_circ = new_preds
        )

### The other methods

In [None]:
np.random.seed(123456789)

In [None]:
all_LODA_AUC = np.zeros((n_trials,n_loops))
all_active_LODA_AUC = np.zeros((n_trials,int(n_loops/2)))
all_GLAD_AUC = np.zeros((n_trials,n_loops))

all_nY1_LODA = np.zeros((n_trials, n_loops))  # Array to store nY1 values
all_nY1_active_LODA = np.zeros((n_trials, int(n_loops/2)))  # Array to store nY1 values
all_nY1_GLAD = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    #Set n:
    num_dim = 2
    # Specific arguments:
    a_list = [np.array([0.0, 0.0])]
    L = num_dim
    radius = 1
    epsilon = radius*np.exp((1/L)*(np.log(tau)-np.log(1-tau)))
    
    # Sampling
    X, Y = sample_data(
        sample_uniform_with_anomalies_in_ball,
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,  
        tau = tau, 
        a_list = a_list, 
        epsilon = epsilon, 
        L = L,
        radius = radius
    )
    
    
    X_AUC, Y_AUC = sample_data(
        sample_uniform_with_anomalies_in_ball,
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,  
        tau = tau, 
        a_list = a_list, 
        epsilon = epsilon, 
        L = L, 
        radius=radius
    )
    
    
    # Your code for plotting data
    plt.figure(figsize=(10, 8))
    
    # Plot density of nominals (blue)
    sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6)
    
    # Plot density of anomalies (red)
    sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6)
    
    # Customizing the plot
    plt.title("Density Plot (2D)", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Manually create legend
    legend_elements = [
        Line2D([0], [0], color='blue', lw=2, label="Nominals"),
        Line2D([0], [0], color='red', lw=2, label="Anomalies")
    ]
    
    plt.legend(handles=legend_elements, loc='upper right')  # Manually adding legend
    
    plt.grid(True)
    plt.show()
    
    
    plt.figure(figsize=(10, 8))
    
    # Scatter plot for nominals (blue)
    plt.scatter(X[Y == 0][:, 0], X[Y == 0][:, 1], color='blue', alpha=0.6, label="Nominals")
    
    # Scatter plot for anomalies (red)
    plt.scatter(X[Y == 1][:, 0], X[Y == 1][:, 1], color='red', alpha=0.6, label="Anomalies")
    
    # Customizing the plot
    plt.title("Scatter Plot of Nominals and Anomalies", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Adding legend
    plt.legend(loc='upper right')  # Automatically adding legend with labels from scatter plots
    
    # Adding grid
    plt.grid(True)
    
    # Showing the plot
    plt.show()
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    
    #####################################################################################################################
    # LODA TRIALS
    
    curr_L_index = n_old
    LODA_AUC = [0]*n_loops
        
    weighted_scores=np.mean(new_unweighted_validation_scores,axis=1)

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(n_loops):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Final LODA scores are averages over anomaly detectors
        new_LODA_scores = np.mean(new_unweighted_scores,axis=1)

        #############################################################
        # Sort these scores:
        top_k = n_send
        sorted_indices = np.argsort(new_LODA_scores)
        sorted_scores = new_LODA_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_LODA[curr_trial,r] = nY1
        #############################################################
        
        #############################################################
        LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_scores)
        all_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)

    #####################################################################################################################
    # ACTIVE-LODA trials:
    
    #We do however have to provide C_tau since it needs to be used before the optimization
    #function. C_tau = 0.03 is the default suggested in Das et al. (2016).
    C_tau = tau
    
    #There are hyperparameters that need to be set in advance for this algorithm. However, 
    #for simplicity we assume they tal the default values in the function optimize_w.
    #C_A = 100  #default in their article
    #C_eta = 1000. #default in their article
    
    #In active LODA, the whole budget of n_send items per loop is dedicated to sending the 
    #items with highest predicted scores:
    top_k = n_send
    
    #So now that we have the LODA projectors (i.e., a set of best_m anomaly detectors),
    #we can begin.
    
    #We shall initialize the vector of weights as being equal and summing to 1:
    w_old = np.array([1/best_m for i in range(best_m)])
    
    #We also initialize arrays to put the unweighted scores of labeled data into:
    H_A = np.empty((0, best_m))
    H_N = np.empty((0, best_m))
    
    #We also initialize a fake anomaly alert to 0 (see below). This means basically that
    #we have not so far had to add a "fake anomaly" to the optimization due to there only
    #being labeled nominals so far.
    fake_anomaly = 0
    
    #Unlike basic LODA, here anomaly and nominal labels MATTER. In particular, at the
    #beginning, it matters whether there is initial "old" data, and if so, whether some
    #or all of it is already labeled. If n_old > 0, then we have already calculated Y_muted
    #earlier in this script (for some fixed percentage u of this "old" data for which we
    #suppose we know its true label)
    
    #If there were initial data and at least one labeled nominal (following Das et al. (2016))
    if n_old > 0 and np.sum(Y_muted == 0) > 0:
            
        #Extract the initial data from X:
        X_new = X[:n_old,:]
    
        #Calculate the unweighted scores for each LODA projector on the initial data: 
        new_unweighted_scores = np.empty((n_old,best_m))
    
        for i, (name, model) in enumerate(models.items()):
    
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()    
    
        #Calculate the sum of the linear combination of these scores weighted by w_old:
        new_scores = np.matmul(new_unweighted_scores,w_old)
    
        #Sort new_scores from smallest to largest, whilst retaining the indices.
        sorted_indices = np.argsort(new_scores)
        sorted_scores = new_scores[sorted_indices]
    
        #Calculate q_tau on this initial data: 
        #WARNING: one of the underlying problems with active-LODA is that it basically expects
        #anomalies to have the highest scores from the get go. But here, in this first loop,
        #it very well could be that the anomalies have all the LOWEST scores. The calculation
        #of q_tau at this point is therefore a bit weird/pointless. Also, active-LODA basically
        #expects positive weights, especially in its minimization step, with its L2 norm penalty
        #on the weights. This means that active-LODA does not reach its true potential, as
        #defined and coded by Das et al. (2016). The trouble is is that without a penalty on 
        #making the weights not too big, not too small, and not necessarily positive, it will
        #remain suboptimal. 
        my_quantile_sorted_index = int(np.floor(n_old*(1-C_tau)))-1*(C_tau != 1)
        q_tau = sorted_scores[my_quantile_sorted_index]
    
        #Create the arrays H_A and H_N:
        #First we take H_A. In Das et al. (2016) they allow for the case that there
        #are no labeled anomalies, only labeled nominals.
        if np.sum(Y_muted == 1) == 0:
            #print('There were no labeled anomalies in the old data')
            #Set a "fake anomaly alert to 1":
            fake_anomaly = 1
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            H_N = new_unweighted_scores[(Y_muted==0),:]
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
                    
        else:
            #There is at least one labeled anomaly:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            H_N = new_unweighted_scores[(Y_muted==0),:]
     
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
            
            temp_unweighted = np.concatenate([H_A,H_N])
            temp_pred = np.matmul(temp_unweighted,w_new)
            temp_YA = [1]*np.shape(H_A)[0]
            temp_YN = [0]*np.shape(H_N)[0]
            temp_Y = temp_YA + temp_YN
            
        #We can now update w_old with the value of w_new. If this whole big loop wasn't run,
        #then w_old will stay at its original value.
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
    
    #Else if there were initial data but no labeled nominals: Das et al. (2016)
    #provide no details for what to do here. It is possible though unlikely that
    #in the initial data, there were no labeled nominals but there was at least
    #one labeled anomaly. We do have to check this, since we will need to initialize
    #H_A in this case, even if H_N is empty.
    elif n_old > 0 and np.sum(Y_muted == 0) == 0:
        if np.sum(Y_muted == 1) > 0:
            #initialize H_A:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            #Potential Python issue whereby if there is just one anomaly, weird
            #things happen with array shapes. To guard against this:
            if np.sum(Y_muted == 1) == 1:
                if np.shape(H_A) != (1,best_m):
                    H_A = H_A.reshape(1,-1)
                
    #So, at this point, either we had no old data and w_old is still equal weights, 
    #with H_A, H_N empty arrays, or else there was old data, and H_A and H_N may 
    #have been added to (or not), while w_old may or may not have already been 
    #updated.
    
    #We next move to the batch data.
    
    curr_L_index = n_old
    active_LODA_AUC = [0]*n_loops

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(int(n_loops/2)):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        n_new_true_anom = sum(Y_new==1)
        
        #Update for the next loop:
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Temporary final active-LODA scores are a linear combination over anomaly detectors:
        temp_new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        #Calculated the weighted scores on the external validation set with the current
        #value of w_old:
        weighted_validation_scores = np.matmul(new_unweighted_validation_scores,w_old)
        active_LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        all_active_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_validation_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)

        new_unweighted_scores = np.concatenate([new_unweighted_scores,H_A,H_N])
        
        #Actual final active-LODA scores are a linear combination over anomaly detectors:
        new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)

        temp_Y = Y_new.tolist() + [1]*np.shape(H_A)[0] + [0]*np.shape(H_N)[0]
        
        #Following the methodology in Das et al. (2016), we should provide the 
        #highest scoring data-point to an expert for labeling. In order to be
        #slightly more general, we shall instead provide the top_k scoring data
        #points to the expert, where top_k has been pre-defined.
        
        #Sort new_active_LODA_scores from smallest to largest, whilst retaining the indices.
        #Remember that these may include items from the previous loop or from the initialization.
        #However, since we appended those on to the end, we know their indices will be B, B+1,...
        #so we will be able to look out for them.
        sorted_indices = np.argsort(new_active_LODA_scores)
        sorted_scores = new_active_LODA_scores[sorted_indices]

        my_quantile_sorted_index = int(np.floor(len(new_active_LODA_scores)*(1-C_tau)))-1*(C_tau != 1)
        
        #We now have to go through the sorted_indices from the end back towards the 
        #beginning until we manage to gather top_k indices which are less than or equal to B-1.
        top_k_indices = []
        n_indices_so_far = 0
        curr_index = len(sorted_indices)
        while n_indices_so_far < top_k:
            curr_index = curr_index - 1
            next_top_index = sorted_indices[curr_index]
            #print('next top index:',next_top_index)
            if next_top_index < B:
                top_k_indices.append(next_top_index)
                n_indices_so_far = n_indices_so_far + 1
                
        #Now we go and get the labels in Y_new associated with these indices:
        #top_k_indices = np.array(top_k_indices)
        Y_expert = Y_new[top_k_indices]

        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_active_LODA[curr_trial,r] = nY1
        
        #Now is a good time to check whether we just found at least one real anomaly while
        #up to now we only had one fake anomaly:
        if sum(Y_expert==1) > 0 and fake_anomaly==1:
            #We now reset H_A to be empty:
            H_A = np.empty((0, best_m))
            #And we set fake_anomaly to 0 forever:
            fake_anomaly = 0
            
        if sum(Y_expert==1) == 0 and fake_anomaly==1:
            #We reset H_A back to empty again:
            H_A = np.empty((0, best_m))
                
        #We then need to append the relevant unweighted scores to the current H_A and H_N
        for j in range(top_k):
            if Y_expert[j]==1:
                H_A = np.concatenate([H_A,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
            else:
                H_N = np.concatenate([H_N,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
                
        #Here we have to check whether this was the first time H_A actually contained
        #a real anomaly or not, rather than a temporary one (if there were no labeled anomalies)    
        
        #Now since top_k > 0 we know that there is a positive number of labeled data for sure.
        #We always have to retain however the possibility that all of the labeled data so far
        #are nominals. 
        if np.shape(H_A)[0]==0:
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
        else:
            w_new = optimize_w_2(H_A, H_N, q_tau)
    
        #Update q_tau on this data:
        q_tau = sorted_scores[my_quantile_sorted_index]
        
        #update w_old
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
        temp_unweighted = np.concatenate([H_A,H_N])
        temp_pred = np.matmul(temp_unweighted,w_old)
        temp_YA = [1]*np.shape(H_A)[0]
        temp_YN = [0]*np.shape(H_N)[0]
        temp_Y = temp_YA + temp_YN
    
    #####################################################################################################################
    # GLAD trials:
    
    #Initialize some parameters:
    b = 0.5
    mylambda = 1
    top_k = n_send
    q_tau_tm1 = -10e7
    
    # Build the model:
    model_GLAD = build_neural_network(np.shape(X)[1], best_m)
    
    # Compile the model with the custom loss function:
    model_GLAD.compile(optimizer='adam', loss=custom_binary_crossentropy_loss(b=b,mylambda=mylambda), metrics=['accuracy'])
    
    # Train the model
    # We use the same n_min used earlier to find the number of LODA projections:
    y_true = np.full((n_min, best_m), b)
    model_GLAD.fit(X[:n_min,:], y_true, epochs=10, batch_size=32,verbose=0)
    
    # Sanity check: the output for each data point should all be very close to b:
    row = X[1:2] 
    output = model_GLAD.predict(row)
    
    #INITIALIZATION
    #Dealing with edge cases:
    if n_old == 0:
        X_lab = np.empty([0,np.shape(X)[1]])
        Y_lab = []
        all_labeled_scores = np.empty([0,best_m])
        all_unweighted_scores = np.empty([0,best_m])
    
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted  #We directly use Y_muted, as in all the other methods
        which_lab = [i for i in range(len(Y_old)) if not np.isnan(Y_old[i])]
    
        if len(which_lab) == 0:
            X_lab = np.empty([0,np.shape(X)[1]])
            Y_lab = [] 
            all_unweighted_scores = np.empty([0,best_m])
            all_labeled_scores = np.empty([0,best_m])
        else:
            X_lab = X_old[which_lab,:]
            Y_lab = [Y_old[j] for j in which_lab]
            all_unweighted_scores = np.empty([np.shape(X_old)[0],best_m])
            for i, (name, model) in enumerate(models.items()):
                model.fit(X_old)
                y_score = model.score_samples(X_old)
                y_score.dtype = np.float64
                all_unweighted_scores[:,i] = y_score.squeeze()
    
            all_labeled_scores = all_unweighted_scores[which_lab,:]
    
    #LOOPING OVER BATCHES:
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    GLAD_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Calculate all of the scores for X_new:
        all_scores = np.empty([np.shape(X_new)[0],best_m])
        for i, (name, model) in enumerate(models.items()):
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            all_scores[:,i] = y_score.squeeze()
    
        #Update:
        all_unweighted_scores = np.concatenate([all_unweighted_scores,all_scores])
    
        # Pass X_new through the current state of the neural network in order to get the weights out:
        curr_w = model_GLAD.predict(X_new)

        X_new_final_scores = np.sum(all_scores * curr_w, axis=1)
    
        # Sort these scores:
        sorted_indices = np.argsort(X_new_final_scores)
        sorted_scores = X_new_final_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        # Add the scores of the top_k data points to all_labeled_scores:
        all_labeled_scores = np.concatenate([all_labeled_scores,all_scores[top_k_indices,:]])
    
        # Add the relevant data points to X_lab
        X_lab = np.concatenate([X_lab,X_new[top_k_indices,:]])
    
        # Get the true labels of these data points and add them to Y_lab
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
    
        #Update X_so_far (all X data so far):
        X_so_far = X[:(n_old + (r+1)*B),:]
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_GLAD[curr_trial, r] = nY1
    
        curr_w_ext = model_GLAD.predict(X_AUC)
        X_new_final_scores_ext = np.zeros((np.shape(X_AUC)[0],))
        for k in range(np.shape(X_AUC)[0]):
            X_new_final_scores_ext[k] = np.matmul(new_unweighted_validation_scores[k,:],np.transpose(curr_w_ext[k,:]))
        
        GLAD_AUC[r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
        all_GLAD_AUC[curr_trial,r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
    
        fpr, tpr, _ = roc_curve(Y_AUC,X_new_final_scores_ext)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
        #Check if this is the first time through:
        if q_tau_tm1 == -10e7:
            #Dealing with q_tau_tm1
            all_weights_so_far = model_GLAD.predict(X_so_far)
            all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
            for k in range(np.shape(X_so_far)[0]):
                all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

            # Calculate the quantile index without fully sorting
            quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
            partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
            q_tau_tm1 = partitioned_array[quantile_index]
 
        # 1. Identify the indices of anomalies (Y_lab == 1)
        anomaly_indices = np.where(np.array(Y_lab) == 1)[0]
        
        # 2. Repeat the corresponding rows in X_lab and all_labeled_scores (4 copies for each anomaly)
        X_lab_temp = np.vstack([X_lab] + [X_lab[anomaly_indices]] * 4)  # Repeat rows of X_lab for anomalies
        all_labeled_scores_temp = np.vstack([all_labeled_scores] + [all_labeled_scores[anomaly_indices]] * 4)  # Repeat rows of all_labeled_scores for anomalies
        X_so_far_temp = np.vstack([X_so_far] + [X_lab[anomaly_indices]] * 4)

        # 3. Extend Y_lab_temp with four 1s for each anomaly (4 copies for each anomaly)
        Y_lab_temp = np.concatenate([Y_lab, [1] * 4 * len(anomaly_indices)])
        
        model_GLAD.compile(optimizer='adam', loss=lambda y_true, y_pred: new_custom_loss_2(X_lab_temp, Y_lab_temp, q_tau_tm1, all_labeled_scores_temp,model_GLAD, X_so_far_temp, mylambda, b), metrics=['accuracy'])

        
        # Train the model for further iterations (e.g., 10 epochs)
        model_GLAD.fit(X_lab_temp, np.array(Y_lab_temp), epochs=10, batch_size=32,verbose=0)
        
        #Dealing with q_tau_tm1
        all_weights_so_far = model_GLAD.predict(X_so_far)
        all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
        for k in range(np.shape(X_so_far)[0]):
            all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

        # Calculate the quantile index without fully sorting
        quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
        partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
        q_tau_tm1 = partitioned_array[quantile_index]
        
    
        #Update indices:
        curr_L_index = curr_L_index + B

# Calculate column averages for each array
avg_LODA_circ = np.mean(all_LODA_AUC, axis=0)
avg_active_LODA_circ = np.mean(all_active_LODA_AUC, axis=0)
avg_GLAD_circ = np.mean(all_GLAD_AUC, axis=0)

avg_nY1_LODA_circ = np.mean(all_nY1_LODA, axis=0)
avg_nY1_active_LODA_circ = np.mean(all_nY1_active_LODA, axis=0)
avg_nY1_GLAD_circ = np.mean(all_nY1_GLAD, axis=0)


#Save these results:
np.savez("AUC_circ.npz",
         avg_LODA_circ = avg_LODA_circ,
         avg_active_LODA_circ = avg_active_LODA_circ,
         avg_GLAD_circ = avg_GLAD_circ,
         avg_nY1_LODA_circ = avg_nY1_LODA_circ,
         avg_nY1_active_LODA_circ = avg_nY1_active_LODA_circ,
         avg_nY1_GLAD_circ = avg_nY1_GLAD_circ,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_circ.npz",
         weighted_scores_circ = weighted_scores,
         weighted_validation_scores_circ = weighted_validation_scores,
         X_new_final_scores_ext_circ = X_new_final_scores_ext,
        )

### Bring back saved data

In [None]:
# Ours
data_circ_onlyAAA = np.load("AUC_circ_onlyAAA.npz")
avg_AAA_circ = data_circ_onlyAAA["avg_AAA_circ"]
avg_nY1_AAA_circ = data_circ_onlyAAA["avg_nY1_AAA_circ"]


# The others
data_circ = np.load("AUC_circ.npz")
avg_LODA_circ = data_circ["avg_LODA_circ"]
avg_active_LODA_circ = data_circ["avg_active_LODA_circ"]
avg_GLAD_circ = data_circ["avg_GLAD_circ"]
avg_nY1_LODA_circ = data_circ["avg_nY1_LODA_circ"]
avg_nY1_active_LODA_circ = data_circ["avg_nY1_active_LODA_circ"]
avg_nY1_GLAD_circ = data_circ["avg_nY1_GLAD_circ"]

### Plot AUC

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the column averages
plt.figure(figsize=(10, 6))

plt.plot(xx,avg_LODA_circ, label="LODA", marker='o')
plt.plot(xxactiveLODA,avg_active_LODA_circ, label="Active-LODA", marker='s')
plt.plot(xx,avg_GLAD_circ, label="GLAD", marker='^')
plt.plot(xx,avg_AAA_circ, label="AAA", marker='d')

# Add plot title and labels
plt.xlabel("Batch", fontsize=14)
plt.ylabel("Average AUC", fontsize=14)

plt.xticks(ticks=xx)

# Add legend
plt.legend(loc="best", fontsize=12)

# Add grid for better readability
plt.grid(True)

# Show the plot
plt.show()

### Plot cumulative number of anomalies detected

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the four lines
plt.plot(xx, avg_nY1_LODA_circ, label="LODA", marker='o')
plt.plot(xxactiveLODA, avg_nY1_active_LODA_circ, label="Active-LODA", marker='s')
plt.plot(xx, avg_nY1_GLAD_circ, label="GLAD", marker='^')
plt.plot(xx, avg_nY1_AAA_circ, label="AAA", marker='d')

# Labels and title
plt.xlabel("Batch")
plt.ylabel("Anomalies detected")
plt.title("Cumulative anomalies detected")

# Legend
plt.legend()

# Grid for better readability
plt.grid(True, linestyle='--', alpha=0.6)

# Show the plot
plt.show()

## Two-dimensional Gaussian with anomalies on edge of a circle

### Our method

In [None]:
np.random.seed(123456789)

In [None]:
all_AAA_AUC = np.zeros((n_trials,n_loops))
all_nY1_AAA = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    num_dim = 2
    # Specific arguments:
    nominal_mean = np.array([0, 0])
    nominal_cov = np.array([[1.0, 0], [0, 1.0]])
    L = num_dim
    radius = 1
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_uniform_surface_anomalies,
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,  
        tau = tau, 
        nominal_mean = nominal_mean,
        nominal_cov = nominal_cov,
        L = L,
        radius = radius
    )
    
    
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_uniform_surface_anomalies,
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,  
        tau = tau, 
        nominal_mean = nominal_mean,
        nominal_cov = nominal_cov,
        L = L,
        radius = radius
    )
    
    
    # Your code for plotting data
    plt.figure(figsize=(10, 8))
    
    # Plot density of nominals (blue)
    sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6)
    
    # Plot density of anomalies (red)
    sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6)
    
    # Customizing the plot
    plt.title("Density Plot (2D)", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Manually create legend
    legend_elements = [
        Line2D([0], [0], color='blue', lw=2, label="Nominals"),
        Line2D([0], [0], color='red', lw=2, label="Anomalies")
    ]
    
    plt.legend(handles=legend_elements, loc='upper right')  # Manually adding legend
    
    plt.grid(True)
    plt.show()
    
    
    plt.figure(figsize=(10, 8))
    
    # Scatter plot for nominals (blue)
    plt.scatter(X[Y == 0][:, 0], X[Y == 0][:, 1], color='blue', alpha=0.6, label="Nominals")
    
    # Scatter plot for anomalies (red)
    plt.scatter(X[Y == 1][:, 0], X[Y == 1][:, 1], color='red', alpha=0.6, label="Anomalies")
    
    # Customizing the plot
    plt.title("Scatter Plot of Nominals and Anomalies", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Adding legend
    plt.legend(loc='upper right')  # Automatically adding legend with labels from scatter plots
    
    # Adding grid
    plt.grid(True)
    
    # Showing the plot
    plt.show()
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    
    #####################################################################################################################
    # Our trials: 
    
    #Choose a supervised method that will be applied:
    supervised_method = 'LogisticRegression'

    #Initialization

    #Dealing with edge cases:
    if n_old == 0:
        X_old = None
        Y_old = None
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted.tolist()  #We directly use Y_muted, as in all the other methods
    
    # Run the initialization function InitActiveAGG:
    X_lab, Y_lab, all_labeled_scores = InitActiveAGG(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models)

    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    AAA_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Learn from labeled data, propose new predicted anomalies, and propose other data to label:
        X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 0.4,min_n_nom=5,min_n_anom=1,tau_exp=tau)  
        
        # Pretend to be the expert and add the true labels to the proposed data:
        expert_provided_labels = [Y_new[j] for j in indices_to_expert]
        Y_lab = Y_lab + expert_provided_labels
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_AAA[curr_trial, r] = nY1
        
        #Test the current learned model on the external data in order to calculate the AUC:
        #new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:, 1]
        if learned_model != None:
            new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:,1]
            AAA_AUC[r] = roc_auc_score(Y_AUC,new_preds)
            all_AAA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,new_preds)
        else:
            AAA_AUC[r] = 0.5
            all_AAA_AUC[curr_trial,r] = 0.5
            
        
        
        #Update indices:
        curr_L_index = curr_L_index + B
    
# Calculate column averages for each array
avg_AAA_ring = np.mean(all_AAA_AUC, axis=0)
avg_nY1_AAA_ring = np.mean(all_nY1_AAA, axis=0)

#Save these results:
np.savez("AUC_ring_onlyAAA.npz",
         avg_AAA_ring = avg_AAA_ring,
         avg_nY1_AAA_ring = avg_nY1_AAA_ring,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_ring_onlyAAA.npz",
         new_preds_ring = new_preds
        )

### The other methods

In [None]:
np.random.seed(123456789)

In [None]:
all_LODA_AUC = np.zeros((n_trials,n_loops))
all_active_LODA_AUC = np.zeros((n_trials,int(n_loops/2)))
all_GLAD_AUC = np.zeros((n_trials,n_loops))

all_nY1_LODA = np.zeros((n_trials, n_loops))  # Array to store nY1 values
all_nY1_active_LODA = np.zeros((n_trials, int(n_loops/2)))  # Array to store nY1 values
all_nY1_GLAD = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    num_dim = 2
    # Specific arguments:
    nominal_mean = np.array([0, 0])
    nominal_cov = np.array([[1.0, 0], [0, 1.0]])
    L = num_dim
    radius = 1
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_uniform_surface_anomalies,
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,  
        tau = tau, 
        nominal_mean = nominal_mean,
        nominal_cov = nominal_cov,
        L = L,
        radius = radius
    )
    
    
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_uniform_surface_anomalies,
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,  
        tau = tau, 
        nominal_mean = nominal_mean,
        nominal_cov = nominal_cov,
        L = L,
        radius = radius
    )
    
    
    # Your code for plotting data
    plt.figure(figsize=(10, 8))
    
    # Plot density of nominals (blue)
    sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6)
    
    # Plot density of anomalies (red)
    sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6)
    
    # Customizing the plot
    plt.title("Density Plot (2D)", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Manually create legend
    legend_elements = [
        Line2D([0], [0], color='blue', lw=2, label="Nominals"),
        Line2D([0], [0], color='red', lw=2, label="Anomalies")
    ]
    
    plt.legend(handles=legend_elements, loc='upper right')  # Manually adding legend
    
    plt.grid(True)
    plt.show()
    
    
    plt.figure(figsize=(10, 8))
    
    # Scatter plot for nominals (blue)
    plt.scatter(X[Y == 0][:, 0], X[Y == 0][:, 1], color='blue', alpha=0.6, label="Nominals")
    
    # Scatter plot for anomalies (red)
    plt.scatter(X[Y == 1][:, 0], X[Y == 1][:, 1], color='red', alpha=0.6, label="Anomalies")
    
    # Customizing the plot
    plt.title("Scatter Plot of Nominals and Anomalies", fontsize=16)
    plt.xlabel("X-axis", fontsize=14)
    plt.ylabel("Y-axis", fontsize=14)
    
    # Adding legend
    plt.legend(loc='upper right')  # Automatically adding legend with labels from scatter plots
    
    # Adding grid
    plt.grid(True)
    
    # Showing the plot
    plt.show()
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        #print('There is no initial data.')
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            #print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
    #Getting the LODA models and the scores on the external validation set:
    #Simply use the first n_min data-points in X to do this.
    models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #Calculate the unweighted scores on the massive external validation set:
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],best_m))
    for i, (name, model) in enumerate(models.items()):
    
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:,i] = y_score.squeeze()
    
    #####################################################################################################################
    # LODA TRIALS
    
    curr_L_index = n_old
    LODA_AUC = [0]*n_loops
        
    weighted_scores=np.mean(new_unweighted_validation_scores,axis=1)

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(n_loops):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Final LODA scores are averages over anomaly detectors
        new_LODA_scores = np.mean(new_unweighted_scores,axis=1)

        #############################################################
        # Sort these scores:
        top_k = n_send
        sorted_indices = np.argsort(new_LODA_scores)
        sorted_scores = new_LODA_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_LODA[curr_trial,r] = nY1
        #############################################################
        
        #############################################################
        LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_scores)
        all_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)

    #####################################################################################################################
    # ACTIVE-LODA trials:
    
    #We do however have to provide C_tau since it needs to be used before the optimization
    #function. C_tau = 0.03 is the default suggested in Das et al. (2016).
    C_tau = tau
    
    #There are hyperparameters that need to be set in advance for this algorithm. However, 
    #for simplicity we assume they tal the default values in the function optimize_w.
    #C_A = 100  #default in their article
    #C_eta = 1000. #default in their article    
    
    #In active LODA, the whole budget of n_send items per loop is dedicated to sending the 
    #items with highest predicted scores:
    top_k = n_send

    #So now that we have the LODA projectors (i.e., a set of best_m anomaly detectors),
    #we can begin.
    
    #We shall initialize the vector of weights as being equal and summing to 1:
    w_old = np.array([1/best_m for i in range(best_m)])
    
    #We also initialize arrays to put the unweighted scores of labeled data into:
    H_A = np.empty((0, best_m))
    H_N = np.empty((0, best_m))
    
    #We also initialize a fake anomaly alert to 0 (see below). This means basically that
    #we have not so far had to add a "fake anomaly" to the optimization due to there only
    #being labeled nominals so far.
    fake_anomaly = 0
    
    #Unlike basic LODA, here anomaly and nominal labels MATTER. In particular, at the
    #beginning, it matters whether there is initial "old" data, and if so, whether some
    #or all of it is already labeled. If n_old > 0, then we have already calculated Y_muted
    #earlier in this script (for some fixed percentage u of this "old" data for which we
    #suppose we know its true label)
    
    #If there were initial data and at least one labeled nominal (following Das et al. (2016))
    if n_old > 0 and np.sum(Y_muted == 0) > 0:
            
        #Extract the initial data from X:
        X_new = X[:n_old,:]
    
        #Calculate the unweighted scores for each LODA projector on the initial data: 
        new_unweighted_scores = np.empty((n_old,best_m))
    
        for i, (name, model) in enumerate(models.items()):
    
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
    
        #Calculate the sum of the linear combination of these scores weighted by w_old:
        new_scores = np.matmul(new_unweighted_scores,w_old)
    
        #Sort new_scores from smallest to largest, whilst retaining the indices.
        sorted_indices = np.argsort(new_scores)
        sorted_scores = new_scores[sorted_indices]
    
        #Calculate q_tau on this initial data: 
        #WARNING: one of the underlying problems with active-LODA is that it basically expects
        #anomalies to have the highest scores from the get go. But here, in this first loop,
        #it very well could be that the anomalies have all the LOWEST scores. The calculation
        #of q_tau at this point is therefore a bit weird/pointless. Also, active-LODA basically
        #expects positive weights, especially in its minimization step, with its L2 norm penalty
        #on the weights. This means that active-LODA does not reach its true potential, as
        #defined and coded by Das et al. (2016). The trouble is is that without a penalty on 
        #making the weights not too big, not too small, and not necessarily positive, it will
        #remain suboptimal. 
        my_quantile_sorted_index = int(np.floor(n_old*(1-C_tau)))-1*(C_tau != 1)
        q_tau = sorted_scores[my_quantile_sorted_index]
    
        #Create the arrays H_A and H_N:
        #First we take H_A. In Das et al. (2016) they allow for the case that there
        #are no labeled anomalies, only labeled nominals.
        if np.sum(Y_muted == 1) == 0:
            #Set a "fake anomaly alert to 1":
            fake_anomaly = 1
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            H_N = new_unweighted_scores[(Y_muted==0),:]
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
                    
        else:
            #There is at least one labeled anomaly:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            H_N = new_unweighted_scores[(Y_muted==0),:]
     
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
            
            temp_unweighted = np.concatenate([H_A,H_N])
            temp_pred = np.matmul(temp_unweighted,w_new)
            temp_YA = [1]*np.shape(H_A)[0]
            temp_YN = [0]*np.shape(H_N)[0]
            temp_Y = temp_YA + temp_YN
            
        #We can now update w_old with the value of w_new. If this whole big loop wasn't run,
        #then w_old will stay at its original value.
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)

    #Else if there were initial data but no labeled nominals: Das et al. (2016)
    #provide no details for what to do here. It is possible though unlikely that
    #in the initial data, there were no labeled nominals but there was at least
    #one labeled anomaly. We do have to check this, since we will need to initialize
    #H_A in this case, even if H_N is empty.
    elif n_old > 0 and np.sum(Y_muted == 0) == 0:
        if np.sum(Y_muted == 1) > 0:
            #initialize H_A:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            #Potential Python issue whereby if there is just one anomaly, weird
            #things happen with array shapes. To guard against this:
            if np.sum(Y_muted == 1) == 1:
                if np.shape(H_A) != (1,best_m):
                    H_A = H_A.reshape(1,-1)
                
    #So, at this point, either we had no old data and w_old is still equal weights, 
    #with H_A, H_N empty arrays, or else there was old data, and H_A and H_N may 
    #have been added to (or not), while w_old may or may not have already been 
    #updated.
    
    #We next move to the batch data.
    
    curr_L_index = n_old
    active_LODA_AUC = [0]*n_loops

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(int(n_loops/2)):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        n_new_true_anom = sum(Y_new==1)
        
        #Update for the next loop:
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Temporary final active-LODA scores are a linear combination over anomaly detectors:
        temp_new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        #Calculated the weighted scores on the external validation set with the current
        #value of w_old:
        weighted_validation_scores = np.matmul(new_unweighted_validation_scores,w_old)
        active_LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        all_active_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_validation_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
        
        new_unweighted_scores = np.concatenate([new_unweighted_scores,H_A,H_N])
        
        #Actual final active-LODA scores are a linear combination over anomaly detectors:
        new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        temp_Y = Y_new.tolist() + [1]*np.shape(H_A)[0] + [0]*np.shape(H_N)[0]
        
        #Following the methodology in Das et al. (2016), we should provide the 
        #highest scoring data-point to an expert for labeling. In order to be
        #slightly more general, we shall instead provide the top_k scoring data
        #points to the expert, where top_k has been pre-defined.
        
        #Sort new_active_LODA_scores from smallest to largest, whilst retaining the indices.
        #Remember that these may include items from the previous loop or from the initialization.
        #However, since we appended those on to the end, we know their indices will be B, B+1,...
        #so we will be able to look out for them.
        sorted_indices = np.argsort(new_active_LODA_scores)
        sorted_scores = new_active_LODA_scores[sorted_indices]
    
        my_quantile_sorted_index = int(np.floor(len(new_active_LODA_scores)*(1-C_tau)))-1*(C_tau != 1)
        
        #We now have to go through the sorted_indices from the end back towards the 
        #beginning until we manage to gather top_k indices which are less than or equal to B-1.
        top_k_indices = []
        n_indices_so_far = 0
        curr_index = len(sorted_indices)
        while n_indices_so_far < top_k:
            curr_index = curr_index - 1
            next_top_index = sorted_indices[curr_index]
            if next_top_index < B:
                top_k_indices.append(next_top_index)
                n_indices_so_far = n_indices_so_far + 1
                
        #Now we go and get the labels in Y_new associated with these indices:
        #top_k_indices = np.array(top_k_indices)
        Y_expert = Y_new[top_k_indices]

        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_active_LODA[curr_trial,r] = nY1
        
        #Now is a good time to check whether we just found at least one real anomaly while
        #up to now we only had one fake anomaly:
        if sum(Y_expert==1) > 0 and fake_anomaly==1:
            #We now reset H_A to be empty:
            H_A = np.empty((0, best_m))
            #And we set fake_anomaly to 0 forever:
            fake_anomaly = 0
            
        if sum(Y_expert==1) == 0 and fake_anomaly==1:
            #We reset H_A back to empty again:
            H_A = np.empty((0, best_m))
                
        #We then need to append the relevant unweighted scores to the current H_A and H_N
        for j in range(top_k):
            if Y_expert[j]==1:
                H_A = np.concatenate([H_A,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
            else:
                H_N = np.concatenate([H_N,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
                
        #Here we have to check whether this was the first time H_A actually contained
        #a real anomaly or not, rather than a temporary one (if there were no labeled anomalies)    
        
        #Now since top_k > 0 we know that there is a positive number of labeled data for sure.
        #We always have to retain however the possibility that all of the labeled data so far
        #are nominals. 
        if np.shape(H_A)[0]==0:
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
        else:
            w_new = optimize_w_2(H_A, H_N, q_tau)
    
        #Update q_tau on this data:
        q_tau = sorted_scores[my_quantile_sorted_index]
        
        #update w_old
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
        temp_unweighted = np.concatenate([H_A,H_N])
        temp_pred = np.matmul(temp_unweighted,w_old)
        temp_YA = [1]*np.shape(H_A)[0]
        temp_YN = [0]*np.shape(H_N)[0]
        temp_Y = temp_YA + temp_YN
    
    #####################################################################################################################
    # GLAD trials:
    
    #Initialize some parameters:
    b = 0.5
    mylambda = 1
    top_k = n_send
    q_tau_tm1 = -10e7
    
    # Build the model:
    model_GLAD = build_neural_network(np.shape(X)[1], best_m)
    
    # Compile the model with the custom loss function:
    model_GLAD.compile(optimizer='adam', loss=custom_binary_crossentropy_loss(b=b,mylambda=mylambda), metrics=['accuracy'])
    
    # Train the model
    # We use the same n_min used earlier to find the number of LODA projections:
    y_true = np.full((n_min, best_m), b)
    model_GLAD.fit(X[:n_min,:], y_true, epochs=10, batch_size=32,verbose=0)
    
    # Sanity check: the output for each data point should all be very close to b:
    row = X[1:2] 
    output = model_GLAD.predict(row)
    
    #INITIALIZATION
    #Dealing with edge cases:
    if n_old == 0:
        X_lab = np.empty([0,np.shape(X)[1]])
        Y_lab = []
        all_labeled_scores = np.empty([0,best_m])
        all_unweighted_scores = np.empty([0,best_m])
    
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted  #We directly use Y_muted, as in all the other methods
        which_lab = [i for i in range(len(Y_old)) if not np.isnan(Y_old[i])]
    
        if len(which_lab) == 0:
            X_lab = np.empty([0,np.shape(X)[1]])
            Y_lab = [] 
            all_unweighted_scores = np.empty([0,best_m])
            all_labeled_scores = np.empty([0,best_m])
        else:
            X_lab = X_old[which_lab,:]
            Y_lab = [Y_old[j] for j in which_lab]
            all_unweighted_scores = np.empty([np.shape(X_old)[0],best_m])
            for i, (name, model) in enumerate(models.items()):
                model.fit(X_old)
                y_score = model.score_samples(X_old)
                y_score.dtype = np.float64
                all_unweighted_scores[:,i] = y_score.squeeze()
    
            all_labeled_scores = all_unweighted_scores[which_lab,:]
    
    #LOOPING OVER BATCHES:
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    GLAD_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Calculate all of the scores for X_new:
        all_scores = np.empty([np.shape(X_new)[0],best_m])
        for i, (name, model) in enumerate(models.items()):
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            all_scores[:,i] = y_score.squeeze()
    
        #Update:
        all_unweighted_scores = np.concatenate([all_unweighted_scores,all_scores])
    
        # Pass X_new through the current state of the neural network in order to get the weights out:
        curr_w = model_GLAD.predict(X_new)

        X_new_final_scores = np.sum(all_scores * curr_w, axis=1)
    
        # Sort these scores:
        sorted_indices = np.argsort(X_new_final_scores)
        sorted_scores = X_new_final_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        # Add the scores of the top_k data points to all_labeled_scores:
        all_labeled_scores = np.concatenate([all_labeled_scores,all_scores[top_k_indices,:]])
    
        # Add the relevant data points to X_lab
        X_lab = np.concatenate([X_lab,X_new[top_k_indices,:]])
    
        # Get the true labels of these data points and add them to Y_lab
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
    
        #Update X_so_far (all X data so far):
        X_so_far = X[:(n_old + (r+1)*B),:]
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_GLAD[curr_trial, r] = nY1
    
        curr_w_ext = model_GLAD.predict(X_AUC)
        X_new_final_scores_ext = np.zeros((np.shape(X_AUC)[0],))
        for k in range(np.shape(X_AUC)[0]):
            X_new_final_scores_ext[k] = np.matmul(new_unweighted_validation_scores[k,:],np.transpose(curr_w_ext[k,:]))
        
        GLAD_AUC[r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
        all_GLAD_AUC[curr_trial,r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
    
        fpr, tpr, _ = roc_curve(Y_AUC,X_new_final_scores_ext)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
        #Check if this is the first time through:
        if q_tau_tm1 == -10e7:
            #Dealing with q_tau_tm1
            all_weights_so_far = model_GLAD.predict(X_so_far)
            all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
            for k in range(np.shape(X_so_far)[0]):
                all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

            # Calculate the quantile index without fully sorting
            quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
            partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
            q_tau_tm1 = partitioned_array[quantile_index]

        # 1. Identify the indices of anomalies (Y_lab == 1)
        anomaly_indices = np.where(np.array(Y_lab) == 1)[0]
        
        # 2. Repeat the corresponding rows in X_lab and all_labeled_scores (4 copies for each anomaly)
        X_lab_temp = np.vstack([X_lab] + [X_lab[anomaly_indices]] * 4)  # Repeat rows of X_lab for anomalies
        all_labeled_scores_temp = np.vstack([all_labeled_scores] + [all_labeled_scores[anomaly_indices]] * 4)  # Repeat rows of all_labeled_scores for anomalies
        X_so_far_temp = np.vstack([X_so_far] + [X_lab[anomaly_indices]] * 4)

        # 3. Extend Y_lab_temp with four 1s for each anomaly (4 copies for each anomaly)
        Y_lab_temp = np.concatenate([Y_lab, [1] * 4 * len(anomaly_indices)])

        model_GLAD.compile(optimizer='adam', loss=lambda y_true, y_pred: new_custom_loss_2(X_lab_temp, Y_lab_temp, q_tau_tm1, all_labeled_scores_temp,model_GLAD, X_so_far_temp, mylambda, b), metrics=['accuracy'])

        
        # Train the model for further iterations (e.g., 10 epochs)
        model_GLAD.fit(X_lab_temp, np.array(Y_lab_temp), epochs=10, batch_size=32,verbose=0)
    
        #Dealing with q_tau_tm1
        all_weights_so_far = model_GLAD.predict(X_so_far)
        all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
        for k in range(np.shape(X_so_far)[0]):
            all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

        # Calculate the quantile index without fully sorting
        quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
        partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
        q_tau_tm1 = partitioned_array[quantile_index]
        
    
        #Update indices:
        curr_L_index = curr_L_index + B
    
# Calculate column averages for each array
avg_LODA_ring = np.mean(all_LODA_AUC, axis=0)
avg_active_LODA_ring = np.mean(all_active_LODA_AUC, axis=0)
avg_GLAD_ring = np.mean(all_GLAD_AUC, axis=0)

avg_nY1_LODA_ring = np.mean(all_nY1_LODA, axis=0)
avg_nY1_active_LODA_ring = np.mean(all_nY1_active_LODA, axis=0)
avg_nY1_GLAD_ring = np.mean(all_nY1_GLAD, axis=0)

#Save these results:
np.savez("AUC_ring.npz",
         avg_LODA_ring = avg_LODA_ring,
         avg_active_LODA_ring = avg_active_LODA_ring,
         avg_GLAD_ring = avg_GLAD_ring,
         avg_nY1_LODA_ring = avg_nY1_LODA_ring,
         avg_nY1_active_LODA_ring = avg_nY1_active_LODA_ring,
         avg_nY1_GLAD_ring = avg_nY1_GLAD_ring,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_ring.npz",
         weighted_scores_ring = weighted_scores,
         weighted_validation_scores_ring = weighted_validation_scores,
         X_new_final_scores_ext_ring = X_new_final_scores_ext,
        )

### Bring back saved variables

In [None]:
# Ours
data_ring_onlyAAA = np.load("AUC_ring_onlyAAA.npz")
avg_AAA_ring = data_ring_onlyAAA["avg_AAA_ring"]
avg_nY1_AAA_ring = data_ring_onlyAAA["avg_nY1_AAA_ring"]

# The others
data_ring = np.load("AUC_ring.npz")
avg_LODA_ring = data_ring["avg_LODA_ring"]
avg_active_LODA_ring = data_ring["avg_active_LODA_ring"]
avg_GLAD_ring = data_ring["avg_GLAD_ring"]
avg_nY1_LODA_ring = data_ring["avg_nY1_LODA_ring"]
avg_nY1_active_LODA_ring = data_ring["avg_nY1_active_LODA_ring"]
avg_nY1_GLAD_ring = data_ring["avg_nY1_GLAD_ring"]

### Plot AUC

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the column averages
plt.figure(figsize=(10, 6))

plt.plot(xx,avg_LODA_ring, label="LODA", marker='o')
plt.plot(xxactiveLODA,avg_active_LODA_ring, label="Active-LODA", marker='s')
plt.plot(xx,avg_GLAD_ring, label="GLAD", marker='^')
plt.plot(xx,avg_AAA_ring, label="AAA", marker='d')

# Add plot title and labels
plt.xlabel("Batch", fontsize=14)
plt.ylabel("Average AUC", fontsize=14)

plt.xticks(ticks=xx)

# Add legend
plt.legend(loc="best", fontsize=12)

# Add grid for better readability
plt.grid(True)

# Show the plot
plt.show()

### Plot cumulative number of anomalies detected

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the four lines
plt.plot(xx, avg_nY1_LODA_ring, label="LODA", marker='o')
plt.plot(xxactiveLODA, avg_nY1_active_LODA_ring, label="Active-LODA", marker='s')
plt.plot(xx, avg_nY1_GLAD_ring, label="GLAD", marker='^')
plt.plot(xx, avg_nY1_AAA_ring, label="AAA", marker='d')

# Labels and title
plt.xlabel("Batch")
plt.ylabel("Anomalies detected")
plt.title("Cumulative anomalies detected")

# Legend
plt.legend()

# Grid for better readability
plt.grid(True, linestyle='--', alpha=0.6)

# Show the plot
plt.show()

## Bring back all saved variables and plot them

In [None]:
# Ours
data_unif_onlyAAA = np.load("AUC_unif_onlyAAA.npz")
avg_AAA_unif = data_unif_onlyAAA["avg_AAA_unif"]
avg_nY1_AAA_unif = data_unif_onlyAAA["avg_nY1_AAA_unif"]
# The others
data_unif = np.load("AUC_unif.npz")
avg_LODA_unif = data_unif["avg_LODA_unif"]
avg_active_LODA_unif = data_unif["avg_active_LODA_unif"]
avg_GLAD_unif = data_unif["avg_GLAD_unif"]
avg_nY1_LODA_unif = data_unif["avg_nY1_LODA_unif"]
avg_nY1_active_LODA_unif = data_unif["avg_nY1_active_LODA_unif"]
avg_nY1_GLAD_unif = data_unif["avg_nY1_GLAD_unif"]

# Ours
data_circ_onlyAAA = np.load("AUC_circ_onlyAAA.npz")
avg_AAA_circ = data_circ_onlyAAA["avg_AAA_circ"]
avg_nY1_AAA_circ = data_circ_onlyAAA["avg_nY1_AAA_circ"]
# The others
data_circ = np.load("AUC_circ.npz")
avg_LODA_circ = data_circ["avg_LODA_circ"]
avg_active_LODA_circ = data_circ["avg_active_LODA_circ"]
avg_GLAD_circ = data_circ["avg_GLAD_circ"]
avg_nY1_LODA_circ = data_circ["avg_nY1_LODA_circ"]
avg_nY1_active_LODA_circ = data_circ["avg_nY1_active_LODA_circ"]
avg_nY1_GLAD_circ = data_circ["avg_nY1_GLAD_circ"]

# Ours
data_ring_onlyAAA = np.load("AUC_ring_onlyAAA.npz")
avg_AAA_ring = data_ring_onlyAAA["avg_AAA_ring"]
avg_nY1_AAA_ring = data_ring_onlyAAA["avg_nY1_AAA_ring"]
# The others
data_ring = np.load("AUC_ring.npz")
avg_LODA_ring = data_ring["avg_LODA_ring"]
avg_active_LODA_ring = data_ring["avg_active_LODA_ring"]
avg_GLAD_ring = data_ring["avg_GLAD_ring"]
avg_nY1_LODA_ring = data_ring["avg_nY1_LODA_ring"]
avg_nY1_active_LODA_ring = data_ring["avg_nY1_active_LODA_ring"]
avg_nY1_GLAD_ring = data_ring["avg_nY1_GLAD_ring"]


In [None]:
# Create the subplot grid (3 rows, 3 columns now)
fig, axes = plt.subplots(3, 3, figsize=(12, 12))

# Define x-axis values
xx = list(range(1, n_loops + 1))
xxactiveLODA = list(range(1, int(n_loops/2) + 1))

################################################################################
#Unif:
    
num_dim = 2
a_list = [np.ones(num_dim)*0.5]
epsilon = np.sqrt(tau)/2
L = num_dim

# Sampling
X, Y = sample_data(
    uniform_sampling_point_mass_with_epsilon_n_dim_rejection,
    n_old=n_old,                                           # Initial number of data points
    B=B,                                                   # Batch size
    n_loops=n_loops,  
    tau = tau, 
    a_list = a_list, 
    epsilon = epsilon, 
    L = L, 
    lower=0, 
    upper=1
)

# Plot on the i-th subplot in the first column
ax = axes[0, 0]
sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6, ax=ax)
sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6, ax=ax)

# Add title, labels, and grid
ax.set_title("Density Plot", fontsize=14)
ax.set_xlabel("X-axis", fontsize=12)
ax.set_ylabel("Y-axis", fontsize=12)
ax.grid(True)

# Add legend
legend_elements = [
    Line2D([0], [0], color='blue', lw=2, label="Nominals"),
    Line2D([0], [0], color='red', lw=2, label="Anomalies")
]
ax.legend(handles=legend_elements, loc='upper right', fontsize=14)

#####################################################################################################

################################################################################
#Circ:

#Set n:
num_dim = 2
# Specific arguments:
a_list = [np.array([0.0, 0.0])]
L = num_dim
radius = 1
epsilon = radius*np.exp((1/L)*(np.log(tau)-np.log(1-tau)))

# Sampling
X, Y = sample_data(
    sample_uniform_with_anomalies_in_ball,
    n_old=n_old,                                           # Initial number of data points
    B=B,                                                   # Batch size
    n_loops=n_loops,  
    tau = tau, 
    a_list = a_list, 
    epsilon = epsilon, 
    L = L,
    radius = radius
)

# Plot on the i-th subplot in the first column
ax = axes[1, 0]
sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6, ax=ax)
sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6, ax=ax)

# Add title, labels, and grid
ax.set_title("Density Plot", fontsize=14)
ax.set_xlabel("X-axis", fontsize=12)
ax.set_ylabel("Y-axis", fontsize=12)
ax.grid(True)

# Add legend
legend_elements = [
    Line2D([0], [0], color='blue', lw=2, label="Nominals"),
    Line2D([0], [0], color='red', lw=2, label="Anomalies")
]
ax.legend(handles=legend_elements, loc='upper right', fontsize=14)

#####################################################################################################

################################################################################
#Ring:
    
num_dim = 2
# Specific arguments:
nominal_mean = np.array([0, 0])
nominal_cov = np.array([[1.0, 0], [0, 1.0]])
L = num_dim
radius = 1

# Sampling
X, Y = sample_data(
    multivariate_gaussian_sampling_with_uniform_surface_anomalies,
    n_old=n_old,                                           # Initial number of data points
    B=B,                                                   # Batch size
    n_loops=n_loops,  
    tau = tau, 
    nominal_mean = nominal_mean,
    nominal_cov = nominal_cov,
    L = L,
    radius = radius
)

# Plot on the i-th subplot in the first column
ax = axes[2, 0]
sns.kdeplot(x=X[Y == 0][:, 0], y=X[Y == 0][:, 1], cmap="Blues", fill=True, alpha=0.6, ax=ax)
sns.kdeplot(x=X[Y == 1][:, 0], y=X[Y == 1][:, 1], cmap="Reds", fill=True, alpha=0.6, ax=ax)

# Add title, labels, and grid
ax.set_title("Density plot", fontsize=17)
ax.set_xlabel("X-axis", fontsize=15)
ax.set_ylabel("Y-axis", fontsize=15)
ax.grid(True)

# Add legend
legend_elements = [
    Line2D([0], [0], color='blue', lw=2, label="Nominals"),
    Line2D([0], [0], color='red', lw=2, label="Anomalies")
]
ax.legend(handles=legend_elements, loc='upper right', fontsize=14)

#####################################################################################################




######################################################################################################
# Add the AUC subplots too:


# Iterate through the sets of arrays for AUC plots
for i, (avg_LODA_plot, avg_active_LODA_plot, avg_GLAD_plot, avg_AAA_plot) in enumerate([
    (avg_LODA_unif, avg_active_LODA_unif, avg_GLAD_unif, avg_AAA_unif),
    (avg_LODA_circ, avg_active_LODA_circ, avg_GLAD_circ, avg_AAA_circ),
    (avg_LODA_ring, avg_active_LODA_ring, avg_GLAD_ring, avg_AAA_ring)
]):
    # Target subplot in the second column (index 1 for columns)
    ax = axes[i, 1]
    
    # Plot the data on the subplot
    ax.plot(xx, avg_LODA_plot, label="LODA",linewidth=1.5)
    ax.plot(xxactiveLODA, avg_active_LODA_plot, label="Active-LODA",linewidth=1.5)
    ax.plot(xx, avg_GLAD_plot, label="GLAD",linewidth=1.5)
    ax.plot(xx, avg_AAA_plot, label="AAA",linewidth=1.5)
    
    # Add labels and grid
    ax.set_xlabel("Batch", fontsize=15)
    ax.set_ylabel("Average AUC", fontsize=15)
    #ax.set_xticks(xx)
    #ax.set_xticks(range(5, n_loops + 1, 5))
    ax.grid(True)
    
    # Add legend
    ax.legend(loc="best", fontsize=13)
    
    # Optional: Add a title for each subplot
    ax.set_title("Average AUC over time", fontsize=17)

###############################################################################@
# Cumulative anomaly plots

for i, (avg_nY1_LODA_plot, avg_nY1_active_LODA_plot, avg_nY1_GLAD_plot, avg_nY1_AAA_plot) in enumerate([
    (avg_nY1_LODA_unif, avg_nY1_active_LODA_unif, avg_nY1_GLAD_unif, avg_nY1_AAA_unif),
    (avg_nY1_LODA_circ, avg_nY1_active_LODA_circ, avg_nY1_GLAD_circ, avg_nY1_AAA_circ),
    (avg_nY1_LODA_ring, avg_nY1_active_LODA_ring, avg_nY1_GLAD_ring, avg_nY1_AAA_ring),
]):
    ax = axes[i, 2]

    # Plot the cumulative anomaly counts
    ax.plot(xx, avg_nY1_LODA_plot, label="LODA", linewidth=1.5, linestyle='-')
    ax.plot(xxactiveLODA, avg_nY1_active_LODA_plot, label="Active-LODA", linewidth=1.5, linestyle=':')
    ax.plot(xx, avg_nY1_GLAD_plot, label="GLAD", linewidth=1.5, linestyle='-.')
    ax.plot(xx, avg_nY1_AAA_plot, label="AAA", linewidth=1.5, linestyle='-')

    ax.set_xlabel("Batch", fontsize=15)
    ax.set_ylabel("Anomalies detected", fontsize=15)
    ax.set_title("Cumul. anomalies detected", fontsize=17)
    ax.grid(True, linestyle='--', alpha=0.6)
    ax.legend(fontsize=13)


# Adjust layout for better spacing
plt.tight_layout()

# Save the plot:
fig.savefig("2d_Scenarios2.pdf", format="pdf", bbox_inches="tight")

# Display the plot
plt.show()

# Score function trials

## Global argument values

There are a number of arguments which are required for all of the data generation methods. At this point in the script we shall give them actual default values. In the interest of specific simulation schemes, we may change these values further below to gain a better understanding of certain results. 

In [None]:
# The number of "old" data points we start with:
n_old = 1000

# The number of data points in each future batch:
B = 500

# The number of future batches:
n_loops = 200

# The mixture parameter: (there is no mixture required for the data generation part of this part of script)
tau = 0.00

# Choose n_send for any method:
n_send = 5

#n_min required for GLAD:
n_min = 1000

#Set an upper bound for the number of LODA projections:
M_max = 15

#Parameter set to default 0.01 in Pevny (2015). However, in small-dimensional settings (e.g., d=2) this
#may not be a good idea?
tau_M = 0.1

#Proportion of initial data we suppose we know the true anomaly status of:
u = 0.1

#Number of simulation trials for each setting:
n_trials = 5

#Number of models:
n_models = 10

## The random score way

Here we simply assign to each and every data point a uniform random score between 0 and 1 over a set of such "models". We then choose one of the models, and some subinterval in [0,1], and give anomaly labels = 1 to points with scores in this interval, and nominal labels = 0 to all the rest. Thus the scores will be linked with anomaly labels for only this model. 

In [None]:
np.random.seed(123456789)

### Our method

In [None]:
all_AAA_AUC = np.zeros((n_trials,n_loops))
all_nY1_AAA = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 
    
    # Set the location of the anomaly scores (out of 0.05, 0.25, 0.5, 0.75, 0.95 ?)
    my_centre = 0.5
    
    #Set tau back to 0 for each loop, since we don't want a mixture in the original data now:
    tau = 0
    
    # Specific arguments:
    a_list = [[0.5,0.5]]
    anomaly_cov_list = [ 
        [[0.1,0],[0,0.1]]
    ]
    nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    
    #RESET TAU TO ITS TRUE VALUE:
    tau = 0.01
    
    curr_method = "RandomScore"
    
    # Build the models dictionary
    models = build_random_score_models(M=n_models)
    best_m = n_models
    
    new_unweighted_scores_fixed = np.zeros((X.shape[0], len(models)))
        
    # Iterate through the models and get their scores
    for i, (name, model) in enumerate(models.items()):
        model.fit(X)
        y_score = model.score_samples(X)
        y_score.dtype = np.float64
        new_unweighted_scores_fixed[:, i] = y_score.squeeze()
    
    new_unweighted_validation_scores = np.zeros((X_AUC.shape[0], len(models)))
    
    # Do the same thing for the external data:
    for i, (name, model) in enumerate(models.items()):
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:, i] = y_score.squeeze()
    
    
    # (Re)Define the labels Y based on some feature of the data. Here to begin with 
    # this will be some subinterval for the scores from the first model.
    
    Y = np.empty((np.shape(X)[0],))
    anom_interval_min = my_centre - (tau)
    anom_interval_max = my_centre +  (tau)
    for i in range(len(Y)):
        Y[i] = 1*(anom_interval_min <=  new_unweighted_scores_fixed[i,0] <= anom_interval_max) 
    
    Y_AUC = np.empty((np.shape(X_AUC)[0],))
    anom_interval_min = my_centre - (tau)
    anom_interval_max = my_centre +  (tau)
    for i in range(len(Y_AUC)):
        Y_AUC[i] = 1*(anom_interval_min <=  new_unweighted_validation_scores[i,0] <= anom_interval_max) 
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
        if curr_method == "RandomScore" and n_remaining_anomalies > 0:
            related_scores = []
            for jj in range(n_old):
                if Y_muted[jj] == 1:
                    related_scores = related_scores + [new_unweighted_scores_fixed[jj,n_models-1]]
    
    
    ###############################################################################
    # AAA method
    
    #Choose a supervised method that will be applied:
    supervised_method = 'RandomForestClassifier'
    
    #Initialization
    
    #Dealing with edge cases:
    if n_old == 0:
        X_old = None
        Y_old = None
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted.tolist()  #We directly use Y_muted, as in all the other methods
    
        # Run the initialization function InitActiveAGG_2:
        if curr_method == "RandomScore":
            X_lab, Y_lab, all_labeled_scores = InitActiveAGG_2(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models,curr_method=curr_method,now_scores = new_unweighted_scores_fixed[:n_old,:])
        
        else:
            X_lab, Y_lab, all_labeled_scores = InitActiveAGG_2(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models,curr_method=curr_method)
    
    
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    AAA_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Learn from labeled data, propose new predicted anomalies, and propose other data to label:
        if curr_method == "RandomScore":
            X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG_2(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 0.6,min_n_nom=5,min_n_anom=1,tau_exp=tau,curr_method = curr_method,now_scores = new_unweighted_scores_fixed[curr_L_index:curr_R_index,:])  
        else:
            X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG_2(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 0.6,min_n_nom=5,min_n_anom=1,tau_exp=tau,curr_method = curr_method)  
        
        # Pretend to be the expert and add the true labels to the proposed data:
        expert_provided_labels = [Y_new[j] for j in indices_to_expert]
        Y_lab = Y_lab + expert_provided_labels
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_AAA[curr_trial, r] = nY1

       
        #Test the current learned model on the external data in order to calculate the AUC:
        #new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:, 1]
        if learned_model != None:
            new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:,1]
            AAA_AUC[r] = roc_auc_score(Y_AUC,new_preds)
            all_AAA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,new_preds)
        else:
            AAA_AUC[r] = 0.5
            all_AAA_AUC[curr_trial,r] = 0.5
            
        
        
        #Update indices:
        curr_L_index = curr_L_index + B
        
# Calculate column averages for each array
avg_AAA_RS1 = np.mean(all_AAA_AUC, axis=0)
avg_nY1_AAA_RS1 = np.mean(all_nY1_AAA, axis=0)

#Save these results:
np.savez("AUC_RS1_onlyAAA.npz",
         avg_AAA_RS1 = avg_AAA_RS1,
         avg_nY1_AAA_RS1 = avg_nY1_AAA_RS1,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_RS1_onlyAAA.npz",
         new_preds_RS1 = new_preds
        )

### The other methods

In [None]:
np.random.seed(123456789)

In [None]:
all_LODA_AUC = np.zeros((n_trials,n_loops))
all_active_LODA_AUC = np.zeros((n_trials,int(n_loops/2)))
all_GLAD_AUC = np.zeros((n_trials,n_loops))

all_nY1_LODA = np.zeros((n_trials, n_loops))  # Array to store nY1 values
all_nY1_active_LODA = np.zeros((n_trials, int(n_loops/2)))  # Array to store nY1 values
all_nY1_GLAD = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 
    
    # Set the location of the anomaly scores (out of 0.05, 0.25, 0.5, 0.75, 0.95 ?)
    my_centre = 0.5
    
    #Set tau back to 0 for each loop, since we don't want a mixture in the original data now:
    tau = 0
    
    # Specific arguments:
    a_list = [[0.5,0.5]]
    anomaly_cov_list = [ 
        [[0.1,0],[0,0.1]]
    ]
    nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    
    #RESET TAU TO ITS TRUE VALUE:
    tau = 0.01
    
    curr_method = "RandomScore"
    
    # Build the models dictionary
    models = build_random_score_models(M=n_models)
    best_m = n_models
    
    new_unweighted_scores_fixed = np.zeros((X.shape[0], len(models)))
        
    # Iterate through the models and get their scores
    for i, (name, model) in enumerate(models.items()):
        model.fit(X)
        y_score = model.score_samples(X)
        y_score.dtype = np.float64
        new_unweighted_scores_fixed[:, i] = y_score.squeeze()
    
    new_unweighted_validation_scores = np.zeros((X_AUC.shape[0], len(models)))
    
    # Do the same thing for the external data:
    for i, (name, model) in enumerate(models.items()):
        model.fit(X_AUC)
        y_score = model.score_samples(X_AUC)
        y_score.dtype = np.float64
        new_unweighted_validation_scores[:, i] = y_score.squeeze()
    
    
    # (Re)Define the labels Y based on some feature of the data. Here to begin with 
    # this will be some subinterval for the scores from the first model.
    
    Y = np.empty((np.shape(X)[0],))
    anom_interval_min = my_centre - (tau)
    anom_interval_max = my_centre +  (tau)
    for i in range(len(Y)):
        Y[i] = 1*(anom_interval_min <=  new_unweighted_scores_fixed[i,0] <= anom_interval_max) 
    
    Y_AUC = np.empty((np.shape(X_AUC)[0],))
    anom_interval_min = my_centre - (tau)
    anom_interval_max = my_centre +  (tau)
    for i in range(len(Y_AUC)):
        Y_AUC[i] = 1*(anom_interval_min <=  new_unweighted_validation_scores[i,0] <= anom_interval_max) 
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
        if curr_method == "RandomScore" and n_remaining_anomalies > 0:
            related_scores = []
            for jj in range(n_old):
                if Y_muted[jj] == 1:
                    related_scores = related_scores + [new_unweighted_scores_fixed[jj,n_models-1]]
    
    
    ###################################################################################
    #LODA
    
    curr_L_index = n_old
    LODA_AUC = [0]*n_loops
        
    weighted_scores=np.mean(new_unweighted_validation_scores,axis=1)

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(n_loops):

        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
    
        if curr_method == "RandomScore":
            new_unweighted_scores = new_unweighted_scores_fixed[curr_L_index:curr_R_index ,:]
        else:
            for i, (name, model) in enumerate(models.items()):
                
                model.fit(X_new)
                y_score = model.score_samples(X_new)
                y_score.dtype = np.float64
                new_unweighted_scores[:,i] = y_score.squeeze()
       
        #Final LODA scores are averages over anomaly detectors
        new_LODA_scores = np.mean(new_unweighted_scores,axis=1)

        #############################################################
        # Sort these scores:
        top_k = n_send
        sorted_indices = np.argsort(new_LODA_scores)
        sorted_scores = new_LODA_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_LODA[curr_trial,r] = nY1
        #############################################################
        
        #############################################################
        LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_scores)
        all_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
    
    #############################################################################################
    #ACTIVE LODA
    
    #We do however have to provide C_tau since it needs to be used before the optimization
    #function. C_tau = 0.03 is the default suggested in Das et al. (2016).
    C_tau = tau 
    
    #In active LODA, the whole budget of n_send items per loop is dedicated to sending the 
    #items with highest predicted scores:
    top_k = n_send
    
    #Use the first n_min data-points in X to get the number and set of LODA projectors:
    #models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #So now that we have the LODA projectors (i.e., a set of best_m anomaly detectors),
    #we can begin.
    
    #We shall initialize the vector of weights as being equal and summing to 1:
    w_old = np.array([1/best_m for i in range(best_m)])
    
    #We also initialize arrays to put the unweighted scores of labeled data into:
    H_A = np.empty((0, best_m))
    H_N = np.empty((0, best_m))
    
    #We also initialize a fake anomaly alert to 0 (see below). This means basically that
    #we have not so far had to add a "fake anomaly" to the optimization due to there only
    #being labeled nominals so far.
    fake_anomaly = 0
    
    #Unlike basic LODA, here anomaly and nominal labels MATTER. In particular, at the
    #beginning, it matters whether there is initial "old" data, and if so, whether some
    #or all of it is already labeled. If n_old > 0, then we have already calculated Y_muted
    #earlier in this script (for some fixed percentage u of this "old" data for which we
    #suppose we know its true label)
    
    #If there were initial data and at least one labeled nominal (following Das et al. (2016))
    if n_old > 0 and np.sum(Y_muted == 0) > 0:
            
        #Extract the initial data from X:
        X_new = X[:n_old,:]
    
        #Calculate the unweighted scores for each LODA projector on the initial data: 
        new_unweighted_scores = np.empty((n_old,best_m))
    
        if curr_method == "RandomScore":
            new_unweighted_scores = new_unweighted_scores_fixed[:n_old,:]
        else:
            for i, (name, model) in enumerate(models.items()):
        
                model.fit(X_new)
                y_score = model.score_samples(X_new)
                y_score.dtype = np.float64
                new_unweighted_scores[:,i] = y_score.squeeze()
    
        #Calculate the sum of the linear combination of these scores weighted by w_old:
        new_scores = np.matmul(new_unweighted_scores,w_old)
    
        #Sort new_scores from smallest to largest, whilst retaining the indices.
        sorted_indices = np.argsort(new_scores)
        sorted_scores = new_scores[sorted_indices]
    
        #Calculate q_tau on this initial data: 
        #WARNING: one of the underlying problems with active-LODA is that it basically expects
        #anomalies to have the highest scores from the get go. But here, in this first loop,
        #it very well could be that the anomalies have all the LOWEST scores. The calculation
        #of q_tau at this point is therefore a bit weird/pointless. Also, active-LODA basically
        #expects positive weights, especially in its minimization step, with its L2 norm penalty
        #on the weights. This means that active-LODA does not reach its true potential, as
        #defined and coded by Das et al. (2016). The trouble is is that without a penalty on 
        #making the weights not too big, not too small, and not necessarily positive, it will
        #remain suboptimal. 
        my_quantile_sorted_index = int(np.floor(n_old*(1-C_tau)))-1*(C_tau != 1)
        q_tau = sorted_scores[my_quantile_sorted_index]
    
        #Create the arrays H_A and H_N:
        #First we take H_A. In Das et al. (2016) they allow for the case that there
        #are no labeled anomalies, only labeled nominals.
        if np.sum(Y_muted == 1) == 0:
            #print('There were no labeled anomalies in the old data')
            #Set a "fake anomaly alert to 1":
            fake_anomaly = 1
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            H_N = new_unweighted_scores[(Y_muted==0),:]
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
                    
        else:
            #There is at least one labeled anomaly:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            H_N = new_unweighted_scores[(Y_muted==0),:]
     
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
            
            temp_unweighted = np.concatenate([H_A,H_N])
            temp_pred = np.matmul(temp_unweighted,w_new)
            temp_YA = [1]*np.shape(H_A)[0]
            temp_YN = [0]*np.shape(H_N)[0]
            temp_Y = temp_YA + temp_YN

        #We can now update w_old with the value of w_new. If this whole big loop wasn't run,
        #then w_old will stay at its original value.
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)

    #Else if there were initial data but no labeled nominals: Das et al. (2016)
    #provide no details for what to do here. It is possible though unlikely that
    #in the initial data, there were no labeled nominals but there was at least
    #one labeled anomaly. We do have to check this, since we will need to initialize
    #H_A in this case, even if H_N is empty.
    elif n_old > 0 and np.sum(Y_muted == 0) == 0:
        if np.sum(Y_muted == 1) > 0:
            #initialize H_A:
            H_A = new_unweighted_scores[(Y_muted==1),:]
    
            if curr_method == "RandomScore":
                H_A = new_unweighted_scores_fixed[:n_old,:]
                H_A = H_A[(Y_muted==1),:]
            #Potential Python issue whereby if there is just one anomaly, weird
            #things happen with array shapes. To guard against this:
            if np.sum(Y_muted == 1) == 1:
                if np.shape(H_A) != (1,best_m):
                    H_A = H_A.reshape(1,-1)
                
    #So, at this point, either we had no old data and w_old is still equal weights, 
    #with H_A, H_N empty arrays, or else there was old data, and H_A and H_N may 
    #have been added to (or not), while w_old may or may not have already been 
    #updated.
    
    #We next move to the batch data.
    
    curr_L_index = n_old
    active_LODA_AUC = [0]*n_loops

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(int(n_loops/2)):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        n_new_true_anom = sum(Y_new==1)
        
        new_unweighted_scores = np.empty((B,best_m))
    
        if curr_method == "RandomScore":
            new_unweighted_scores = new_unweighted_scores_fixed[curr_L_index:curr_R_index ,:]
        else:
            for i, (name, model) in enumerate(models.items()):
                
                model.fit(X_new)
                y_score = model.score_samples(X_new)
                y_score.dtype = np.float64
                new_unweighted_scores[:,i] = y_score.squeeze()            
            
        #Temporary final active-LODA scores are a linear combination over anomaly detectors:
        temp_new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
            
        #Calculate the AUC for the current batch ONLY:
        #active_LODA_AUC[r] = roc_auc_score(Y_new,temp_new_active_LODA_scores) 
        
        #Calculated the weighted scores on the external validation set with the current
        #value of w_old:
        weighted_validation_scores = np.matmul(new_unweighted_validation_scores,w_old)
        active_LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        all_active_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_validation_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
        
        #We actually have to, at this point, attach the current versions of H_A
        #and H_N to new_unweighted_scores, since in this batch framework, we do
        #not have a fixed number of data points from the start to the finish, like
        #they do in Das et al. (2016). If we do not do this, it will affect the
        #calculation of q_tau over time (a kind of bias will be introduced, maybe
        #not the end of the world, but still.)
        
        new_unweighted_scores = np.concatenate([new_unweighted_scores,H_A,H_N])
        
        #Actual final active-LODA scores are a linear combination over anomaly detectors:
        new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)

        temp_Y = Y_new.tolist() + [1]*np.shape(H_A)[0] + [0]*np.shape(H_N)[0]

        #Following the methodology in Das et al. (2016), we should provide the 
        #highest scoring data-point to an expert for labeling. In order to be
        #slightly more general, we shall instead provide the top_k scoring data
        #points to the expert, where top_k has been pre-defined.
        
        #Sort new_active_LODA_scores from smallest to largest, whilst retaining the indices.
        #Remember that these may include items from the previous loop or from the initialization.
        #However, since we appended those on to the end, we know their indices will be B, B+1,...
        #so we will be able to look out for them.
        sorted_indices = np.argsort(new_active_LODA_scores)
        sorted_scores = new_active_LODA_scores[sorted_indices]

        my_quantile_sorted_index = int(np.floor(len(new_active_LODA_scores)*(1-C_tau)))-1*(C_tau != 1)
        
        #We now have to go through the sorted_indices from the end back towards the 
        #beginning until we manage to gather top_k indices which are less than or equal to B-1.
        top_k_indices = []
        n_indices_so_far = 0
        curr_index = len(sorted_indices)
        while n_indices_so_far < top_k:
            curr_index = curr_index - 1
            next_top_index = sorted_indices[curr_index]
            #print('next top index:',next_top_index)
            if next_top_index < B:
                top_k_indices.append(next_top_index)
                n_indices_so_far = n_indices_so_far + 1
                
        #Now we go and get the labels in Y_new associated with these indices:
        #top_k_indices = np.array(top_k_indices)
        Y_expert = Y_new[top_k_indices]

        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_active_LODA[curr_trial,r] = nY1
        
        #Now is a good time to check whether we just found at least one real anomaly while
        #up to now we only had one fake anomaly:
        if sum(Y_expert==1) > 0 and fake_anomaly==1:
            #We now reset H_A to be empty:
            H_A = np.empty((0, best_m))
            #And we set fake_anomaly to 0 forever:
            fake_anomaly = 0
            
        if sum(Y_expert==1) == 0 and fake_anomaly==1:
            #We reset H_A back to empty again:
            H_A = np.empty((0, best_m))
                
        #We then need to append the relevant unweighted scores to the current H_A and H_N
        for j in range(top_k):
            if Y_expert[j]==1:
                H_A = np.concatenate([H_A,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
            else:
                H_N = np.concatenate([H_N,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
                
        #Here we have to check whether this was the first time H_A actually contained
        #a real anomaly or not, rather than a temporary one (if there were no labeled anomalies)    
        
        #Now since top_k > 0 we know that there is a positive number of labeled data for sure.
        #We always have to retain however the possibility that all of the labeled data so far
        #are nominals. 
        if np.shape(H_A)[0]==0:
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
        else:
            w_new = optimize_w_2(H_A, H_N, q_tau)
    
        #Update q_tau on this data:
        q_tau = sorted_scores[my_quantile_sorted_index]
        
        #update w_old
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
        temp_unweighted = np.concatenate([H_A,H_N])
        temp_pred = np.matmul(temp_unweighted,w_old)
        temp_YA = [1]*np.shape(H_A)[0]
        temp_YN = [0]*np.shape(H_N)[0]
        temp_Y = temp_YA + temp_YN
    
        #Update for the next loop:
        curr_L_index = curr_L_index + B
    
    ##########################################################################
    #GLAD
    
    #Initialize some parameters:
    b = 0.5
    mylambda = 1
    top_k = n_send
    q_tau_tm1 = -10e7
    
    # Build the model:
    model_GLAD = build_neural_network(np.shape(X)[1], best_m)
    
    # Compile the model with the custom loss function:
    model_GLAD.compile(optimizer='adam', loss=custom_binary_crossentropy_loss(b=b,mylambda=mylambda), metrics=['accuracy'])
    
    model_GLAD.summary()
    
    # Train the model
    # We use the same n_min used earlier to find the number of LODA projections:
    y_true = np.full((n_min, best_m), b)
    
    model_GLAD.fit(X[:n_min,:], y_true, epochs=10, batch_size=32)
    
    # Sanity check: the output for each data point should all be very close to b:
    row = X[1:2] 
    output = model_GLAD.predict(row)
    
    #INITIALIZATION
    #Dealing with edge cases:
    if n_old == 0:
        X_lab = np.empty([0,np.shape(X)[1]])
        Y_lab = []
        all_labeled_scores = np.empty([0,best_m])
        all_unweighted_scores = np.empty([0,best_m])
    
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted  #We directly use Y_muted, as in all the other methods
        which_lab = [i for i in range(len(Y_old)) if not np.isnan(Y_old[i])]
    
        if len(which_lab) == 0:
            X_lab = np.empty([0,np.shape(X)[1]])
            Y_lab = [] 
            all_unweighted_scores = np.empty([0,best_m])
            all_labeled_scores = np.empty([0,best_m])
        else:
            X_lab = X_old[which_lab,:]
            Y_lab = [Y_old[j] for j in which_lab]
            all_unweighted_scores = np.empty([np.shape(X_old)[0],best_m])
    
            if curr_method == "RandomScore":
                all_unweighted_scores = new_unweighted_scores_fixed[:np.shape(X_old)[0],:]
            else:
                for i, (name, model) in enumerate(models.items()):
                    model.fit(X_old)
                    y_score = model.score_samples(X_old)
                    y_score.dtype = np.float64
                    all_unweighted_scores[:,i] = y_score.squeeze()
    
    
            all_labeled_scores = all_unweighted_scores[which_lab,:]
    
    #LOOPING OVER BATCHES:
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    GLAD_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Calculate all of the scores for X_new:
        all_scores = np.empty([np.shape(X_new)[0],best_m])
    
        if curr_method == "RandomScore":
                all_scores = new_unweighted_scores_fixed[curr_L_index:curr_R_index,:]
        else:
            for i, (name, model) in enumerate(models.items()):
                model.fit(X_new)
                y_score = model.score_samples(X_new)
                y_score.dtype = np.float64
                all_scores[:,i] = y_score.squeeze()
    
    
        #Update:
        all_unweighted_scores = np.concatenate([all_unweighted_scores,all_scores])
    
        # Pass X_new through the current state of the neural network in order to get the weights out:
        curr_w = model_GLAD.predict(X_new)

        X_new_final_scores = np.sum(all_scores * curr_w, axis=1)

        # Sort these scores:
        sorted_indices = np.argsort(X_new_final_scores)
        sorted_scores = X_new_final_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        # Add the scores of the top_k data points to all_labeled_scores:
        all_labeled_scores = np.concatenate([all_labeled_scores,all_scores[top_k_indices,:]])
    
        # Add the relevant data points to X_lab
        X_lab = np.concatenate([X_lab,X_new[top_k_indices,:]])
    
        # Get the true labels of these data points and add them to Y_lab
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
    
        #Update X_so_far (all X data so far):
        X_so_far = X[:(n_old + (r+1)*B),:]
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_GLAD[curr_trial, r] = nY1
    
        curr_w_ext = model_GLAD.predict(X_AUC)
        X_new_final_scores_ext = np.zeros((np.shape(X_AUC)[0],))
        for k in range(np.shape(X_AUC)[0]):
            X_new_final_scores_ext[k] = np.matmul(new_unweighted_validation_scores[k,:],np.transpose(curr_w_ext[k,:]))
        
        GLAD_AUC[r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
        all_GLAD_AUC[curr_trial,r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
    
        fpr, tpr, _ = roc_curve(Y_AUC,X_new_final_scores_ext)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
        #Check if this is the first time through:
        if q_tau_tm1 == -10e7:
            #Dealing with q_tau_tm1
            all_weights_so_far = model_GLAD.predict(X_so_far)
            all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
            for k in range(np.shape(X_so_far)[0]):
                all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

            # Calculate the quantile index without fully sorting
            quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
            partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
            q_tau_tm1 = partitioned_array[quantile_index]    
    
        # Next, compile the model with the second custom loss function:
        # 1. Identify the indices of anomalies (Y_lab == 1)
        anomaly_indices = np.where(np.array(Y_lab) == 1)[0]
        
        # 2. Repeat the corresponding rows in X_lab and all_labeled_scores (4 copies for each anomaly)
        X_lab_temp = np.vstack([X_lab] + [X_lab[anomaly_indices]] * 4)  # Repeat rows of X_lab for anomalies
        all_labeled_scores_temp = np.vstack([all_labeled_scores] + [all_labeled_scores[anomaly_indices]] * 4)  # Repeat rows of all_labeled_scores for anomalies
        X_so_far_temp = np.vstack([X_so_far] + [X_lab[anomaly_indices]] * 4)

        # 3. Extend Y_lab_temp with four 1s for each anomaly (4 copies for each anomaly)
        Y_lab_temp = np.concatenate([Y_lab, [1] * 4 * len(anomaly_indices)])
        
        model_GLAD.compile(optimizer='adam', loss=lambda y_true, y_pred: new_custom_loss_2(X_lab_temp, Y_lab_temp, q_tau_tm1, all_labeled_scores_temp,model_GLAD, X_so_far_temp, mylambda, b), metrics=['accuracy'])
    
        # Train the model for further iterations (e.g., 10 epochs)
        model_GLAD.fit(X_lab, np.array(Y_lab), epochs=10, batch_size=32,verbose=0)
    
        #Dealing with q_tau_tm1
        all_weights_so_far = model_GLAD.predict(X_so_far)
        all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
        for k in range(np.shape(X_so_far)[0]):
            all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

        # Calculate the quantile index without fully sorting
        quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
        partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
        q_tau_tm1 = partitioned_array[quantile_index]
        
    
        #Update indices:
        curr_L_index = curr_L_index + B
    
# Calculate column averages for each array
avg_LODA_RS1 = np.mean(all_LODA_AUC, axis=0)
avg_active_LODA_RS1 = np.mean(all_active_LODA_AUC, axis=0)
avg_GLAD_RS1 = np.mean(all_GLAD_AUC, axis=0)

avg_nY1_LODA_RS1 = np.mean(all_nY1_LODA, axis=0)
avg_nY1_active_LODA_RS1 = np.mean(all_nY1_active_LODA, axis=0)
avg_nY1_GLAD_RS1 = np.mean(all_nY1_GLAD, axis=0)

#Save these results:
np.savez("AUC_RS1.npz",
         avg_LODA_RS1 = avg_LODA_RS1,
         avg_active_LODA_RS1 = avg_active_LODA_RS1,
         avg_GLAD_RS1 = avg_GLAD_RS1,
         avg_nY1_LODA_RS1 = avg_nY1_LODA_RS1,
         avg_nY1_active_LODA_RS1 = avg_nY1_active_LODA_RS1,
         avg_nY1_GLAD_RS1 = avg_nY1_GLAD_RS1,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_RS1.npz",
         weighted_scores_RS1 = weighted_scores,
         weighted_validation_scores_RS1 = weighted_validation_scores,
         X_new_final_scores_ext_RS1 = X_new_final_scores_ext,
        )

### Plot the AUCs

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the column averages
plt.figure(figsize=(10, 6))

plt.plot(xx,avg_LODA_RS1, label="LODA", marker='o')
plt.plot(xxactiveLODA,avg_active_LODA_RS1, label="Active-LODA", marker='s')
plt.plot(xx,avg_GLAD_RS1, label="GLAD", marker='^')
plt.plot(xx,avg_AAA_RS1, label="AAA", marker='d')

# Add plot title and labels
plt.xlabel("Batch", fontsize=14)
plt.ylabel("Average AUC", fontsize=14)

plt.xticks(ticks=xx)

# Add legend
plt.legend(loc="best", fontsize=12)

# Add grid for better readability
plt.grid(True)

# Show the plot
plt.show()

### Plot cumulative anomalies detected

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the four lines
plt.plot(xx, avg_nY1_LODA_RS1, label="LODA", marker='o')
plt.plot(xxactiveLODA, avg_nY1_active_LODA_RS1, label="Active-LODA", marker='s')
plt.plot(xx, avg_nY1_GLAD_RS1, label="GLAD", marker='^')
plt.plot(xx, avg_nY1_AAA_RS1, label="AAA", marker='d')

# Labels and title
plt.xlabel("Batch")
plt.ylabel("Anomalies detected")
plt.title("Cumulative anomalies detected")

# Legend
plt.legend()

# Grid for better readability
plt.grid(True, linestyle='--', alpha=0.6)

# Show the plot
plt.show()

## The random Gaussian score way I

Here we take each data point, we randomly assign it to nominal or anomaly (via the value of $tau$). We then create ten anomaly detectors, nine of which have uniform random score values, whilst the tenth's scores are generated from different Gaussian distributions depending on whether the relevant point has been assigned to be an anomaly or a nominal. 

### Our method

In [None]:
np.random.seed(123456789)

In [None]:
nominal_params = (0, 1)
anomaly_params = (0, 0.01)

all_AAA_AUC = np.zeros((n_trials,n_loops))
all_nY1_AAA = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials)     
    
    #Set tau back to 0 for each loop, since we don't want a mixture in the original data now:
    tau = 0
    
    # Specific arguments:
    a_list = [[0.5,0.5]]
    anomaly_cov_list = [ 
        [[0.1,0],[0,0.1]]
    ]
    nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
     
    #RESET TAU TO ITS TRUE VALUE:
    tau = 0.01
    
    curr_method = "RandomScore"
    
    best_m = n_models
    
    # Build models
    models = build_custom_score_models(M=n_models, tau=tau, nominal_params=nominal_params, anomaly_params=anomaly_params)
    
    new_unweighted_scores_fixed = np.zeros((X.shape[0], len(models)))
    
    # Generate scores
    i = -1
    for name, model in models.items():
        i = i + 1
        model.fit(X)
        if isinstance(model, MixtureGaussianScoreModel):
            new_unweighted_scores_fixed[:, i], Y = model.score_samples(X)
        else:
            new_unweighted_scores_fixed[:, i] = model.score_samples(X)
    
    
    new_unweighted_validation_scores = np.zeros((X_AUC.shape[0], len(models)))
    
    # Generate scores
    i = -1
    for name, model in models.items():
        i = i + 1
        model.fit(X_AUC)
        if isinstance(model, MixtureGaussianScoreModel):
            new_unweighted_validation_scores[:, i], Y_AUC = model.score_samples(X_AUC)
        else:
            new_unweighted_validation_scores[:, i] = model.score_samples(X_AUC)
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        #print('There is no initial data.')
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
        if curr_method == "RandomScore" and n_remaining_anomalies > 0:
            related_scores = []
            for jj in range(n_old):
                if Y_muted[jj] == 1:
                    related_scores = related_scores + [new_unweighted_scores_fixed[jj,n_models-1]]

    ###############################################################################
    # AAA method

    #Choose a supervised method that will be applied:
    supervised_method = 'RandomForestClassifier'
    
    #Initialization
    
    #Dealing with edge cases:
    if n_old == 0:
        X_old = None
        Y_old = None
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted.tolist()  #We directly use Y_muted, as in all the other methods
    
        # Run the initialization function InitActiveAGG_2:
        if curr_method == "RandomScore":
            X_lab, Y_lab, all_labeled_scores = InitActiveAGG_2(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models,curr_method=curr_method,now_scores = new_unweighted_scores_fixed[:n_old,:])
        
        else:
            X_lab, Y_lab, all_labeled_scores = InitActiveAGG_2(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models,curr_method=curr_method)
    
    
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    AAA_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Learn from labeled data, propose new predicted anomalies, and propose other data to label:
        if curr_method == "RandomScore":
            X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG_2(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 0.6,min_n_nom=5,min_n_anom=1,tau_exp=tau,curr_method = curr_method,now_scores = new_unweighted_scores_fixed[curr_L_index:curr_R_index,:])  
        else:
            X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG_2(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 0.6,min_n_nom=5,min_n_anom=1,tau_exp=tau,curr_method = curr_method)  
        
        # Pretend to be the expert and add the true labels to the proposed data:
        expert_provided_labels = [Y_new[j] for j in indices_to_expert]
        Y_lab = Y_lab + expert_provided_labels
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)

        all_nY1_AAA[curr_trial, r] = nY1
        
        #Test the current learned model on the external data in order to calculate the AUC:
        #new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:, 1]
        if learned_model != None:
            new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:,1]
            AAA_AUC[r] = roc_auc_score(Y_AUC,new_preds)
            all_AAA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,new_preds)
        else:
            AAA_AUC[r] = 0.5
            all_AAA_AUC[curr_trial,r] = 0.5
            
        #Update indices:
        curr_L_index = curr_L_index + B
        

# Calculate column averages for each array
avg_AAA_RS2 = np.mean(all_AAA_AUC, axis=0)
avg_nY1_AAA_RS2 = np.mean(all_nY1_AAA, axis=0)

#Save these results:
np.savez("AUC_RS2_onlyAAA.npz",
         avg_AAA_RS2 = avg_AAA_RS2,
         avg_nY1_AAA_RS2 = avg_nY1_AAA_RS2,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_RS2_onlyAAA.npz",
         new_preds_RS2 = new_preds
        )

### The other methods

In [None]:
np.random.seed(123456789)

In [None]:
nominal_params = (0, 1)
anomaly_params = (0, 0.01)

all_LODA_AUC = np.zeros((n_trials,n_loops))
all_active_LODA_AUC = np.zeros((n_trials,int(n_loops/2)))
all_GLAD_AUC = np.zeros((n_trials,n_loops))

all_nY1_LODA = np.zeros((n_trials, n_loops))  # Array to store nY1 values
all_nY1_active_LODA = np.zeros((n_trials, int(n_loops/2)))  # Array to store nY1 values
all_nY1_GLAD = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 
    
    #Set tau back to 0 for each loop, since we don't want a mixture in the original data now:
    tau = 0
    
    # Specific arguments:
    a_list = [[0.5,0.5]]
    anomaly_cov_list = [ 
        [[0.1,0],[0,0.1]]
    ]
    nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    #RESET TAU TO ITS TRUE VALUE:
    tau = 0.01
    
    curr_method = "RandomScore"
    
    best_m = n_models
    
    # Build models
    models = build_custom_score_models(M=n_models, tau=tau, nominal_params=nominal_params, anomaly_params=anomaly_params)
    
    new_unweighted_scores_fixed = np.zeros((X.shape[0], len(models)))
    
    # Generate scores
    i = -1
    for name, model in models.items():
        i = i + 1
        model.fit(X)
        if isinstance(model, MixtureGaussianScoreModel):
            new_unweighted_scores_fixed[:, i], Y = model.score_samples(X)
        else:
            new_unweighted_scores_fixed[:, i] = model.score_samples(X)
    
    
    new_unweighted_validation_scores = np.zeros((X_AUC.shape[0], len(models)))
    
    # Generate scores
    i = -1
    for name, model in models.items():
        i = i + 1
        model.fit(X_AUC)
        if isinstance(model, MixtureGaussianScoreModel):
            new_unweighted_validation_scores[:, i], Y_AUC = model.score_samples(X_AUC)
        else:
            new_unweighted_validation_scores[:, i] = model.score_samples(X_AUC)
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
        if curr_method == "RandomScore" and n_remaining_anomalies > 0:
            related_scores = []
            for jj in range(n_old):
                if Y_muted[jj] == 1:
                    related_scores = related_scores + [new_unweighted_scores_fixed[jj,n_models-1]]
    
    ###################################################################################
    #LODA
    
    curr_L_index = n_old
    LODA_AUC = [0]*n_loops
        
    weighted_scores=np.mean(new_unweighted_validation_scores,axis=1)

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(n_loops):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
    
        if curr_method == "RandomScore":
            new_unweighted_scores = new_unweighted_scores_fixed[curr_L_index:curr_R_index ,:]
        else:
            for i, (name, model) in enumerate(models.items()):
                
                model.fit(X_new)
                y_score = model.score_samples(X_new)
                y_score.dtype = np.float64
                new_unweighted_scores[:,i] = y_score.squeeze()
    
        #Final LODA scores are averages over anomaly detectors
        new_LODA_scores = np.mean(new_unweighted_scores,axis=1)

        #############################################################
        # Sort these scores:
        top_k = n_send
        sorted_indices = np.argsort(new_LODA_scores)
        sorted_scores = new_LODA_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_LODA[curr_trial,r] = nY1
        #############################################################

        
        #############################################################
        LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_scores)
        all_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
    
    #############################################################################################
    #ACTIVE LODA
    
    #We do however have to provide C_tau since it needs to be used before the optimization
    #function. C_tau = 0.03 is the default suggested in Das et al. (2016).
    C_tau = tau 
    
    #In active LODA, the whole budget of n_send items per loop is dedicated to sending the 
    #items with highest predicted scores:
    top_k = n_send
    
    #So now that we have the LODA projectors (i.e., a set of best_m anomaly detectors),
    #we can begin.
    
    #We shall initialize the vector of weights as being equal and summing to 1:
    w_old = np.array([1/best_m for i in range(best_m)])
    
    #We also initialize arrays to put the unweighted scores of labeled data into:
    H_A = np.empty((0, best_m))
    H_N = np.empty((0, best_m))
    
    #We also initialize a fake anomaly alert to 0 (see below). This means basically that
    #we have not so far had to add a "fake anomaly" to the optimization due to there only
    #being labeled nominals so far.
    fake_anomaly = 0
    
    #Unlike basic LODA, here anomaly and nominal labels MATTER. In particular, at the
    #beginning, it matters whether there is initial "old" data, and if so, whether some
    #or all of it is already labeled. If n_old > 0, then we have already calculated Y_muted
    #earlier in this script (for some fixed percentage u of this "old" data for which we
    #suppose we know its true label)
    
    #If there were initial data and at least one labeled nominal (following Das et al. (2016))
    if n_old > 0 and np.sum(Y_muted == 0) > 0:
            
        #Extract the initial data from X:
        X_new = X[:n_old,:]
    
        #Calculate the unweighted scores for each LODA projector on the initial data: 
        new_unweighted_scores = np.empty((n_old,best_m))
    
        if curr_method == "RandomScore":
            new_unweighted_scores = new_unweighted_scores_fixed[:n_old,:]
        else:
            for i, (name, model) in enumerate(models.items()):
        
                model.fit(X_new)
                y_score = model.score_samples(X_new)
                y_score.dtype = np.float64
                new_unweighted_scores[:,i] = y_score.squeeze()
        
        #Calculate the sum of the linear combination of these scores weighted by w_old:
        new_scores = np.matmul(new_unweighted_scores,w_old)
    
        #Sort new_scores from smallest to largest, whilst retaining the indices.
        sorted_indices = np.argsort(new_scores)
        sorted_scores = new_scores[sorted_indices]
    
        #Calculate q_tau on this initial data: 
        #WARNING: one of the underlying problems with active-LODA is that it basically expects
        #anomalies to have the highest scores from the get go. But here, in this first loop,
        #it very well could be that the anomalies have all the LOWEST scores. The calculation
        #of q_tau at this point is therefore a bit weird/pointless. Also, active-LODA basically
        #expects positive weights, especially in its minimization step, with its L2 norm penalty
        #on the weights. This means that active-LODA does not reach its true potential, as
        #defined and coded by Das et al. (2016). The trouble is is that without a penalty on 
        #making the weights not too big, not too small, and not necessarily positive, it will
        #remain suboptimal. 
        my_quantile_sorted_index = int(np.floor(n_old*(1-C_tau)))-1*(C_tau != 1)
        q_tau = sorted_scores[my_quantile_sorted_index]
    
        #Create the arrays H_A and H_N:
        #First we take H_A. In Das et al. (2016) they allow for the case that there
        #are no labeled anomalies, only labeled nominals.
        if np.sum(Y_muted == 1) == 0:
            #Set a "fake anomaly alert to 1":
            fake_anomaly = 1
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            H_N = new_unweighted_scores[(Y_muted==0),:]
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)

        else:
            #There is at least one labeled anomaly:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            H_N = new_unweighted_scores[(Y_muted==0),:]
     
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
            
            temp_unweighted = np.concatenate([H_A,H_N])
            temp_pred = np.matmul(temp_unweighted,w_new)
            temp_YA = [1]*np.shape(H_A)[0]
            temp_YN = [0]*np.shape(H_N)[0]
            temp_Y = temp_YA + temp_YN
            
        #We can now update w_old with the value of w_new. If this whole big loop wasn't run,
        #then w_old will stay at its original value.
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)

    #Else if there were initial data but no labeled nominals: Das et al. (2016)
    #provide no details for what to do here. It is possible though unlikely that
    #in the initial data, there were no labeled nominals but there was at least
    #one labeled anomaly. We do have to check this, since we will need to initialize
    #H_A in this case, even if H_N is empty.
    elif n_old > 0 and np.sum(Y_muted == 0) == 0:
        if np.sum(Y_muted == 1) > 0:
            #initialize H_A:
            H_A = new_unweighted_scores[(Y_muted==1),:]
    
            if curr_method == "RandomScore":
                H_A = new_unweighted_scores_fixed[:n_old,:]
                H_A = H_A[(Y_muted==1),:]
            #Potential Python issue whereby if there is just one anomaly, weird
            #things happen with array shapes. To guard against this:
            if np.sum(Y_muted == 1) == 1:
                if np.shape(H_A) != (1,best_m):
                    H_A = H_A.reshape(1,-1)
                
    #So, at this point, either we had no old data and w_old is still equal weights, 
    #with H_A, H_N empty arrays, or else there was old data, and H_A and H_N may 
    #have been added to (or not), while w_old may or may not have already been 
    #updated.
    
    #We next move to the batch data.
    
    curr_L_index = n_old
    active_LODA_AUC = [0]*n_loops

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(int(n_loops/2)):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        n_new_true_anom = sum(Y_new==1)
        
        new_unweighted_scores = np.empty((B,best_m))
    
        if curr_method == "RandomScore":
            new_unweighted_scores = new_unweighted_scores_fixed[curr_L_index:curr_R_index ,:]
        else:
            for i, (name, model) in enumerate(models.items()):
                
                model.fit(X_new)
                y_score = model.score_samples(X_new)
                y_score.dtype = np.float64
                new_unweighted_scores[:,i] = y_score.squeeze()

        #Temporary final active-LODA scores are a linear combination over anomaly detectors:
        temp_new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        #Calculated the weighted scores on the external validation set with the current
        #value of w_old:
        weighted_validation_scores = np.matmul(new_unweighted_validation_scores,w_old)
        active_LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        all_active_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_validation_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
        
        new_unweighted_scores = np.concatenate([new_unweighted_scores,H_A,H_N])
        
        #Actual final active-LODA scores are a linear combination over anomaly detectors:
        new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        temp_Y = Y_new.tolist() + [1]*np.shape(H_A)[0] + [0]*np.shape(H_N)[0]
        
        #Following the methodology in Das et al. (2016), we should provide the 
        #highest scoring data-point to an expert for labeling. In order to be
        #slightly more general, we shall instead provide the top_k scoring data
        #points to the expert, where top_k has been pre-defined.
        
        #Sort new_active_LODA_scores from smallest to largest, whilst retaining the indices.
        #Remember that these may include items from the previous loop or from the initialization.
        #However, since we appended those on to the end, we know their indices will be B, B+1,...
        #so we will be able to look out for them.
        sorted_indices = np.argsort(new_active_LODA_scores)
        sorted_scores = new_active_LODA_scores[sorted_indices]
    
        my_quantile_sorted_index = int(np.floor(len(new_active_LODA_scores)*(1-C_tau)))-1*(C_tau != 1)
        
        #We now have to go through the sorted_indices from the end back towards the 
        #beginning until we manage to gather top_k indices which are less than or equal to B-1.
        top_k_indices = []
        n_indices_so_far = 0
        curr_index = len(sorted_indices)
        while n_indices_so_far < top_k:
            curr_index = curr_index - 1
            next_top_index = sorted_indices[curr_index]
            if next_top_index < B:
                top_k_indices.append(next_top_index)
                n_indices_so_far = n_indices_so_far + 1
                
        #Now we go and get the labels in Y_new associated with these indices:
        #top_k_indices = np.array(top_k_indices)
        Y_expert = Y_new[top_k_indices]

        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_active_LODA[curr_trial,r] = nY1
        
        #Now is a good time to check whether we just found at least one real anomaly while
        #up to now we only had one fake anomaly:
        if sum(Y_expert==1) > 0 and fake_anomaly==1:
            #We now reset H_A to be empty:
            H_A = np.empty((0, best_m))
            #And we set fake_anomaly to 0 forever:
            fake_anomaly = 0
            
        if sum(Y_expert==1) == 0 and fake_anomaly==1:
            #We reset H_A back to empty again:
            H_A = np.empty((0, best_m))
                
        #We then need to append the relevant unweighted scores to the current H_A and H_N
        for j in range(top_k):
            if Y_expert[j]==1:
                H_A = np.concatenate([H_A,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
            else:
                H_N = np.concatenate([H_N,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
                
        #Here we have to check whether this was the first time H_A actually contained
        #a real anomaly or not, rather than a temporary one (if there were no labeled anomalies)    
        
        #Now since top_k > 0 we know that there is a positive number of labeled data for sure.
        #We always have to retain however the possibility that all of the labeled data so far
        #are nominals. 
        if np.shape(H_A)[0]==0:
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
        else:
            w_new = optimize_w_2(H_A, H_N, q_tau)
    
        #Update q_tau on this data:
        q_tau = sorted_scores[my_quantile_sorted_index]
        
        #update w_old
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
        temp_unweighted = np.concatenate([H_A,H_N])
        temp_pred = np.matmul(temp_unweighted,w_old)
        temp_YA = [1]*np.shape(H_A)[0]
        temp_YN = [0]*np.shape(H_N)[0]
        temp_Y = temp_YA + temp_YN
    
        #Update for the next loop:
        curr_L_index = curr_L_index + B
            
    ##########################################################################
    #GLAD
    
    #Initialize some parameters:
    b = 0.5
    mylambda = 1
    top_k = n_send
    q_tau_tm1 = -10e7
    
    # Build the model:
    model_GLAD = build_neural_network(np.shape(X)[1], best_m)
    
    # Compile the model with the custom loss function:
    model_GLAD.compile(optimizer='adam', loss=custom_binary_crossentropy_loss(b=b,mylambda=mylambda), metrics=['accuracy'])
    
    model_GLAD.summary()
    
    # Train the model
    # We use the same n_min used earlier to find the number of LODA projections:
    y_true = np.full((n_min, best_m), b)
    
    model_GLAD.fit(X[:n_min,:], y_true, epochs=10, batch_size=32,verbose=0)
    
    # Sanity check: the output for each data point should all be very close to b:
    row = X[1:2] 
    output = model_GLAD.predict(row)
    
    #INITIALIZATION
    #Dealing with edge cases:
    if n_old == 0:
        X_lab = np.empty([0,np.shape(X)[1]])
        Y_lab = []
        all_labeled_scores = np.empty([0,best_m])
        all_unweighted_scores = np.empty([0,best_m])
    
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted  #We directly use Y_muted, as in all the other methods
        which_lab = [i for i in range(len(Y_old)) if not np.isnan(Y_old[i])]
    
        if len(which_lab) == 0:
            X_lab = np.empty([0,np.shape(X)[1]])
            Y_lab = [] 
            all_unweighted_scores = np.empty([0,best_m])
            all_labeled_scores = np.empty([0,best_m])
        else:
            X_lab = X_old[which_lab,:]
            Y_lab = [Y_old[j] for j in which_lab]
            all_unweighted_scores = np.empty([np.shape(X_old)[0],best_m])
    
            if curr_method == "RandomScore":
                all_unweighted_scores = new_unweighted_scores_fixed[:np.shape(X_old)[0],:]
            else:
                for i, (name, model) in enumerate(models.items()):
                    model.fit(X_old)
                    y_score = model.score_samples(X_old)
                    y_score.dtype = np.float64
                    all_unweighted_scores[:,i] = y_score.squeeze()
    
    
            all_labeled_scores = all_unweighted_scores[which_lab,:]
    
    #LOOPING OVER BATCHES:
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    GLAD_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Calculate all of the scores for X_new:
        all_scores = np.empty([np.shape(X_new)[0],best_m])
    
        if curr_method == "RandomScore":
                all_scores = new_unweighted_scores_fixed[curr_L_index:curr_R_index,:]
        else:
            for i, (name, model) in enumerate(models.items()):
                model.fit(X_new)
                y_score = model.score_samples(X_new)
                y_score.dtype = np.float64
                all_scores[:,i] = y_score.squeeze()
    
        #Update:
        all_unweighted_scores = np.concatenate([all_unweighted_scores,all_scores])
    
        # Pass X_new through the current state of the neural network in order to get the weights out:
        curr_w = model_GLAD.predict(X_new)

        X_new_final_scores = np.sum(all_scores * curr_w, axis=1)
        
        # Sort these scores:
        sorted_indices = np.argsort(X_new_final_scores)
        sorted_scores = X_new_final_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        # Add the scores of the top_k data points to all_labeled_scores:
        all_labeled_scores = np.concatenate([all_labeled_scores,all_scores[top_k_indices,:]])
    
        # Add the relevant data points to X_lab
        X_lab = np.concatenate([X_lab,X_new[top_k_indices,:]])
    
        # Get the true labels of these data points and add them to Y_lab
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
    
        #Update X_so_far (all X data so far):
        X_so_far = X[:(n_old + (r+1)*B),:]
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_GLAD[curr_trial, r] = nY1
    
        curr_w_ext = model_GLAD.predict(X_AUC)
        X_new_final_scores_ext = np.zeros((np.shape(X_AUC)[0],))
        for k in range(np.shape(X_AUC)[0]):
            X_new_final_scores_ext[k] = np.matmul(new_unweighted_validation_scores[k,:],np.transpose(curr_w_ext[k,:]))
        
        GLAD_AUC[r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
        all_GLAD_AUC[curr_trial,r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
    
        fpr, tpr, _ = roc_curve(Y_AUC,X_new_final_scores_ext)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
        #Check if this is the first time through:
        if q_tau_tm1 == -10e7:
            #Dealing with q_tau_tm1
            all_weights_so_far = model_GLAD.predict(X_so_far)
            all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
            for k in range(np.shape(X_so_far)[0]):
                all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

            # Calculate the quantile index without fully sorting
            quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
            partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
            q_tau_tm1 = partitioned_array[quantile_index]
    
        # Next, compile the model with the second custom loss function:
        # 1. Identify the indices of anomalies (Y_lab == 1)
        anomaly_indices = np.where(np.array(Y_lab) == 1)[0]
        
        # 2. Repeat the corresponding rows in X_lab and all_labeled_scores (4 copies for each anomaly)
        X_lab_temp = np.vstack([X_lab] + [X_lab[anomaly_indices]] * 4)  # Repeat rows of X_lab for anomalies
        all_labeled_scores_temp = np.vstack([all_labeled_scores] + [all_labeled_scores[anomaly_indices]] * 4)  # Repeat rows of all_labeled_scores for anomalies
        X_so_far_temp = np.vstack([X_so_far] + [X_lab[anomaly_indices]] * 4)

        # 3. Extend Y_lab_temp with four 1s for each anomaly (4 copies for each anomaly)
        Y_lab_temp = np.concatenate([Y_lab, [1] * 4 * len(anomaly_indices)])
        
        #model_GLAD.compile(optimizer='adam', loss=lambda y_true, y_pred: new_custom_loss(X_lab, Y_lab, q_tau_tm1, all_labeled_scores,model_GLAD, X_so_far, mylambda, b), metrics=['accuracy'])
        model_GLAD.compile(optimizer='adam', loss=lambda y_true, y_pred: new_custom_loss_2(X_lab_temp, Y_lab_temp, q_tau_tm1, all_labeled_scores_temp,model_GLAD, X_so_far_temp, mylambda, b), metrics=['accuracy'])
    
        # Train the model for further iterations (e.g., 10 epochs)
        model_GLAD.fit(X_lab_temp, np.array(Y_lab_temp), epochs=10, batch_size=32,verbose=0)
    
        #Dealing with q_tau_tm1
        all_weights_so_far = model_GLAD.predict(X_so_far)
        all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
        for k in range(np.shape(X_so_far)[0]):
            all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

        # Calculate the quantile index without fully sorting
        quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
        partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
        q_tau_tm1 = partitioned_array[quantile_index]
          
        #Update indices:
        curr_L_index = curr_L_index + B

# Calculate column averages for each array
avg_LODA_RS2 = np.mean(all_LODA_AUC, axis=0)
avg_active_LODA_RS2 = np.mean(all_active_LODA_AUC, axis=0)
avg_GLAD_RS2 = np.mean(all_GLAD_AUC, axis=0)

avg_nY1_LODA_RS2 = np.mean(all_nY1_LODA, axis=0)
avg_nY1_active_LODA_RS2 = np.mean(all_nY1_active_LODA, axis=0)
avg_nY1_GLAD_RS2 = np.mean(all_nY1_GLAD, axis=0)

#Save these results:
np.savez("AUC_RS2.npz",
         avg_LODA_RS2 = avg_LODA_RS2,
         avg_active_LODA_RS2 = avg_active_LODA_RS2,
         avg_GLAD_RS2 = avg_GLAD_RS2,
         avg_nY1_LODA_RS2 = avg_nY1_LODA_RS2,
         avg_nY1_active_LODA_RS2 = avg_nY1_active_LODA_RS2,
         avg_nY1_GLAD_RS2 = avg_nY1_GLAD_RS2,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_RS2.npz",
         weighted_scores_RS2 = weighted_scores,
         weighted_validation_scores_RS2 = weighted_validation_scores,
         X_new_final_scores_ext_RS2 = X_new_final_scores_ext,
        )

### Plot the AUCs

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the column averages
plt.figure(figsize=(10, 6))

plt.plot(xx,avg_LODA_RS2, label="LODA", marker='o')
plt.plot(xxactiveLODA,avg_active_LODA_RS2, label="Active-LODA", marker='s')
plt.plot(xx,avg_GLAD_RS2, label="GLAD", marker='^')
plt.plot(xx,avg_AAA_RS2, label="AAA", marker='d')

# Add plot title and labels
plt.xlabel("Batch", fontsize=14)
plt.ylabel("Average AUC", fontsize=14)

plt.xticks(ticks=xx)

# Add legend
plt.legend(loc="best", fontsize=12)

# Add grid for better readability
plt.grid(True)

# Show the plot
plt.show()

### Plot the cumulative anomalies detected

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the four lines
plt.plot(xx, avg_nY1_LODA_RS2, label="LODA", marker='o')
plt.plot(xxactiveLODA, avg_nY1_active_LODA_RS2, label="Active-LODA", marker='s')
plt.plot(xx, avg_nY1_GLAD_RS2, label="GLAD", marker='^')
plt.plot(xx, avg_nY1_AAA_RS2, label="AAA", marker='d')

# Labels and title
plt.xlabel("Batch")
plt.ylabel("Anomalies detected")
plt.title("Cumulative anomalies detected")

# Legend
plt.legend()

# Grid for better readability
plt.grid(True, linestyle='--', alpha=0.6)

# Show the plot
plt.show()

## The random Gaussian score way II

Here we take each data point, we randomly assign it to nominal or anomaly (via the value of $tau$). We then create ten anomaly detectors, nine of which have uniform random score values, whilst the tenth's scores are generated from different Gaussian distributions depending on whether the relevant point has been assigned to be an anomaly or a nominal.

### Our method

In [None]:
np.random.seed(123456789)

In [None]:
nominal_params = (0, 1)
anomaly_params = (2.5, 0.01)

all_AAA_AUC = np.zeros((n_trials,n_loops))
all_nY1_AAA = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 
    
    #Set tau back to 0 for each loop, since we don't want a mixture in the original data now:
    tau = 0
    
    # Specific arguments:
    a_list = [[0.5,0.5]]
    anomaly_cov_list = [ 
        [[0.1,0],[0,0.1]]
    ]
    nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    
    #RESET TAU TO ITS TRUE VALUE:
    tau = 0.01
    
    curr_method = "RandomScore"
    
    best_m = n_models
    
    # Build models
    models = build_custom_score_models(M=n_models, tau=tau, nominal_params=nominal_params, anomaly_params=anomaly_params)
    
    new_unweighted_scores_fixed = np.zeros((X.shape[0], len(models)))
    
    # Generate scores
    i = -1
    for name, model in models.items():
        i = i + 1
        model.fit(X)
        if isinstance(model, MixtureGaussianScoreModel):
            new_unweighted_scores_fixed[:, i], Y = model.score_samples(X)
        else:
            new_unweighted_scores_fixed[:, i] = model.score_samples(X)
    
    
    new_unweighted_validation_scores = np.zeros((X_AUC.shape[0], len(models)))
    
    # Generate scores
    i = -1
    for name, model in models.items():
        i = i + 1
        model.fit(X_AUC)
        if isinstance(model, MixtureGaussianScoreModel):
            new_unweighted_validation_scores[:, i], Y_AUC = model.score_samples(X_AUC)
        else:
            new_unweighted_validation_scores[:, i] = model.score_samples(X_AUC)
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
        if curr_method == "RandomScore" and n_remaining_anomalies > 0:
            related_scores = []
            for jj in range(n_old):
                if Y_muted[jj] == 1:
                    related_scores = related_scores + [new_unweighted_scores_fixed[jj,n_models-1]]
    
    ###############################################################################
    # AAA method

    #Choose a supervised method that will be applied:
    supervised_method = 'RandomForestClassifier'
    
    #Initialization
    
    #Dealing with edge cases:
    if n_old == 0:
        X_old = None
        Y_old = None
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted.tolist()  #We directly use Y_muted, as in all the other methods

        # Run the initialization function InitActiveAGG_2:
        if curr_method == "RandomScore":
            X_lab, Y_lab, all_labeled_scores = InitActiveAGG_2(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models,curr_method=curr_method,now_scores = new_unweighted_scores_fixed[:n_old,:])
        
        else:
            X_lab, Y_lab, all_labeled_scores = InitActiveAGG_2(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models,curr_method=curr_method)
    
    
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    AAA_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Learn from labeled data, propose new predicted anomalies, and propose other data to label:
        if curr_method == "RandomScore":
            X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG_2(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 0.6,min_n_nom=5,min_n_anom=1,tau_exp=tau,curr_method = curr_method,now_scores = new_unweighted_scores_fixed[curr_L_index:curr_R_index,:])  
        else:
            X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG_2(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 0.6,min_n_nom=5,min_n_anom=1,tau_exp=tau,curr_method = curr_method)  
        
        # Pretend to be the expert and add the true labels to the proposed data:
        expert_provided_labels = [Y_new[j] for j in indices_to_expert]
        Y_lab = Y_lab + expert_provided_labels
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)

        all_nY1_AAA[curr_trial, r] = nY1
        
        #Test the current learned model on the external data in order to calculate the AUC:
        #new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:, 1]
        if learned_model != None:
            new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:,1]
            AAA_AUC[r] = roc_auc_score(Y_AUC,new_preds)
            all_AAA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,new_preds)
        else:
            AAA_AUC[r] = 0.5
            all_AAA_AUC[curr_trial,r] = 0.5

        #Update indices:
        curr_L_index = curr_L_index + B
        

# Calculate column averages for each array
avg_AAA_RS2_2p5 = np.mean(all_AAA_AUC, axis=0)
avg_nY1_AAA_RS2_2p5 = np.mean(all_nY1_AAA, axis=0)

#Save these results:
np.savez("AUC_RS2_2p5_onlyAAA.npz",
         avg_AAA_RS2_2p5 = avg_AAA_RS2_2p5,
         avg_nY1_AAA_RS2_2p5 = avg_nY1_AAA_RS2_2p5,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_RS2_2p5_onlyAAA.npz",
         new_preds_RS2_2p5 = new_preds
        )

### The other methods

In [None]:
np.random.seed(123456789)

In [None]:
nominal_params = (0, 1)
anomaly_params = (2.5, 0.01)

all_LODA_AUC = np.zeros((n_trials,n_loops))
all_active_LODA_AUC = np.zeros((n_trials,int(n_loops/2)))
all_GLAD_AUC = np.zeros((n_trials,n_loops))

all_nY1_LODA = np.zeros((n_trials, n_loops))  # Array to store nY1 values
all_nY1_active_LODA = np.zeros((n_trials, int(n_loops/2)))  # Array to store nY1 values
all_nY1_GLAD = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 
      
    #Set tau back to 0 for each loop, since we don't want a mixture in the original data now:
    tau = 0
    
    # Specific arguments:
    a_list = [[0.5,0.5]]
    anomaly_cov_list = [ 
        [[0.1,0],[0,0.1]]
    ]
    nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    
    #RESET TAU TO ITS TRUE VALUE:
    tau = 0.01
    
    curr_method = "RandomScore"
    
    best_m = n_models
    
    # Build models
    models = build_custom_score_models(M=n_models, tau=tau, nominal_params=nominal_params, anomaly_params=anomaly_params)
    
    new_unweighted_scores_fixed = np.zeros((X.shape[0], len(models)))
    
    # Generate scores
    i = -1
    for name, model in models.items():
        i = i + 1
        model.fit(X)
        if isinstance(model, MixtureGaussianScoreModel):
            new_unweighted_scores_fixed[:, i], Y = model.score_samples(X)
        else:
            new_unweighted_scores_fixed[:, i] = model.score_samples(X)
    
    
    new_unweighted_validation_scores = np.zeros((X_AUC.shape[0], len(models)))
    
    # Generate scores
    i = -1
    for name, model in models.items():
        i = i + 1
        model.fit(X_AUC)
        if isinstance(model, MixtureGaussianScoreModel):
            new_unweighted_validation_scores[:, i], Y_AUC = model.score_samples(X_AUC)
        else:
            new_unweighted_validation_scores[:, i] = model.score_samples(X_AUC)
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        Y_muted = np.empty((0,)).astype(float)
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]
    
        if curr_method == "RandomScore" and n_remaining_anomalies > 0:
            related_scores = []
            for jj in range(n_old):
                if Y_muted[jj] == 1:
                    related_scores = related_scores + [new_unweighted_scores_fixed[jj,n_models-1]]
    
    ###################################################################################
    #LODA
    
    curr_L_index = n_old
    LODA_AUC = [0]*n_loops
        
    weighted_scores=np.mean(new_unweighted_validation_scores,axis=1)

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(n_loops):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
    
        if curr_method == "RandomScore":
            new_unweighted_scores = new_unweighted_scores_fixed[curr_L_index:curr_R_index ,:]
        else:
            for i, (name, model) in enumerate(models.items()):
                
                model.fit(X_new)
                y_score = model.score_samples(X_new)
                y_score.dtype = np.float64
                new_unweighted_scores[:,i] = y_score.squeeze()

        #Final LODA scores are averages over anomaly detectors
        new_LODA_scores = np.mean(new_unweighted_scores,axis=1)

        #############################################################
        # Sort these scores:
        top_k = n_send
        sorted_indices = np.argsort(new_LODA_scores)
        sorted_scores = new_LODA_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_LODA[curr_trial,r] = nY1
        #############################################################

        
        #############################################################
        LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_scores)
        all_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
    
    #############################################################################################
    #ACTIVE LODA
    
    #We do however have to provide C_tau since it needs to be used before the optimization
    #function. C_tau = 0.03 is the default suggested in Das et al. (2016).
    C_tau = tau 
    
    #In active LODA, the whole budget of n_send items per loop is dedicated to sending the 
    #items with highest predicted scores:
    top_k = n_send
    
    #So now that we have the LODA projectors (i.e., a set of best_m anomaly detectors),
    #we can begin.
    
    #We shall initialize the vector of weights as being equal and summing to 1:
    w_old = np.array([1/best_m for i in range(best_m)])
    
    #We also initialize arrays to put the unweighted scores of labeled data into:
    H_A = np.empty((0, best_m))
    H_N = np.empty((0, best_m))
    
    #We also initialize a fake anomaly alert to 0 (see below). This means basically that
    #we have not so far had to add a "fake anomaly" to the optimization due to there only
    #being labeled nominals so far.
    fake_anomaly = 0
    
    #Unlike basic LODA, here anomaly and nominal labels MATTER. In particular, at the
    #beginning, it matters whether there is initial "old" data, and if so, whether some
    #or all of it is already labeled. If n_old > 0, then we have already calculated Y_muted
    #earlier in this script (for some fixed percentage u of this "old" data for which we
    #suppose we know its true label)
    
    #If there were initial data and at least one labeled nominal (following Das et al. (2016))
    if n_old > 0 and np.sum(Y_muted == 0) > 0:
            
        #Extract the initial data from X:
        X_new = X[:n_old,:]
    
        #Calculate the unweighted scores for each LODA projector on the initial data: 
        new_unweighted_scores = np.empty((n_old,best_m))
    
        if curr_method == "RandomScore":
            new_unweighted_scores = new_unweighted_scores_fixed[:n_old,:]
        else:
            for i, (name, model) in enumerate(models.items()):
        
                model.fit(X_new)
                y_score = model.score_samples(X_new)
                y_score.dtype = np.float64
                new_unweighted_scores[:,i] = y_score.squeeze()
    
        #Calculate the sum of the linear combination of these scores weighted by w_old:
        new_scores = np.matmul(new_unweighted_scores,w_old)
    
        #Sort new_scores from smallest to largest, whilst retaining the indices.
        sorted_indices = np.argsort(new_scores)
        sorted_scores = new_scores[sorted_indices]
    
        #Calculate q_tau on this initial data: 
        #WARNING: one of the underlying problems with active-LODA is that it basically expects
        #anomalies to have the highest scores from the get go. But here, in this first loop,
        #it very well could be that the anomalies have all the LOWEST scores. The calculation
        #of q_tau at this point is therefore a bit weird/pointless. Also, active-LODA basically
        #expects positive weights, especially in its minimization step, with its L2 norm penalty
        #on the weights. This means that active-LODA does not reach its true potential, as
        #defined and coded by Das et al. (2016). The trouble is is that without a penalty on 
        #making the weights not too big, not too small, and not necessarily positive, it will
        #remain suboptimal. 
        my_quantile_sorted_index = int(np.floor(n_old*(1-C_tau)))-1*(C_tau != 1)
        q_tau = sorted_scores[my_quantile_sorted_index]
    
        #Create the arrays H_A and H_N:
        #First we take H_A. In Das et al. (2016) they allow for the case that there
        #are no labeled anomalies, only labeled nominals.
        if np.sum(Y_muted == 1) == 0:
            #print('There were no labeled anomalies in the old data')
            #Set a "fake anomaly alert to 1":
            fake_anomaly = 1
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            H_N = new_unweighted_scores[(Y_muted==0),:]
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
                    
        else:
            #There is at least one labeled anomaly:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            H_N = new_unweighted_scores[(Y_muted==0),:]
     
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
            
            temp_unweighted = np.concatenate([H_A,H_N])
            temp_pred = np.matmul(temp_unweighted,w_new)
            temp_YA = [1]*np.shape(H_A)[0]
            temp_YN = [0]*np.shape(H_N)[0]
            temp_Y = temp_YA + temp_YN
            
        #We can now update w_old with the value of w_new. If this whole big loop wasn't run,
        #then w_old will stay at its original value.
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
    
    #Else if there were initial data but no labeled nominals: Das et al. (2016)
    #provide no details for what to do here. It is possible though unlikely that
    #in the initial data, there were no labeled nominals but there was at least
    #one labeled anomaly. We do have to check this, since we will need to initialize
    #H_A in this case, even if H_N is empty.
    elif n_old > 0 and np.sum(Y_muted == 0) == 0:
        if np.sum(Y_muted == 1) > 0:
            #initialize H_A:
            H_A = new_unweighted_scores[(Y_muted==1),:]
    
            if curr_method == "RandomScore":
                H_A = new_unweighted_scores_fixed[:n_old,:]
                H_A = H_A[(Y_muted==1),:]
            #Potential Python issue whereby if there is just one anomaly, weird
            #things happen with array shapes. To guard against this:
            if np.sum(Y_muted == 1) == 1:
                if np.shape(H_A) != (1,best_m):
                    H_A = H_A.reshape(1,-1)
                
    #So, at this point, either we had no old data and w_old is still equal weights, 
    #with H_A, H_N empty arrays, or else there was old data, and H_A and H_N may 
    #have been added to (or not), while w_old may or may not have already been 
    #updated.
    
    #We next move to the batch data.
    
    curr_L_index = n_old
    active_LODA_AUC = [0]*n_loops

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]

    
    for r in range(int(n_loops/2)):
        
        curr_R_index = curr_L_index + B

        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        n_new_true_anom = sum(Y_new==1)
        
        new_unweighted_scores = np.empty((B,best_m))
    
        if curr_method == "RandomScore":
            new_unweighted_scores = new_unweighted_scores_fixed[curr_L_index:curr_R_index ,:]
        else:
            for i, (name, model) in enumerate(models.items()):
                
                model.fit(X_new)
                y_score = model.score_samples(X_new)
                y_score.dtype = np.float64
                new_unweighted_scores[:,i] = y_score.squeeze()
    
        #Temporary final active-LODA scores are a linear combination over anomaly detectors:
        temp_new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)

        #Calculated the weighted scores on the external validation set with the current
        #value of w_old:
        weighted_validation_scores = np.matmul(new_unweighted_validation_scores,w_old)
        active_LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        all_active_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_validation_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
        
        #We actually have to, at this point, attach the current versions of H_A
        #and H_N to new_unweighted_scores, since in this batch framework, we do
        #not have a fixed number of data points from the start to the finish, like
        #they do in Das et al. (2016). If we do not do this, it will affect the
        #calculation of q_tau over time (a kind of bias will be introduced, maybe
        #not the end of the world, but still.)
        
        new_unweighted_scores = np.concatenate([new_unweighted_scores,H_A,H_N])
        
        #Actual final active-LODA scores are a linear combination over anomaly detectors:
        new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        temp_Y = Y_new.tolist() + [1]*np.shape(H_A)[0] + [0]*np.shape(H_N)[0]
        
        #Following the methodology in Das et al. (2016), we should provide the 
        #highest scoring data-point to an expert for labeling. In order to be
        #slightly more general, we shall instead provide the top_k scoring data
        #points to the expert, where top_k has been pre-defined.
        
        #Sort new_active_LODA_scores from smallest to largest, whilst retaining the indices.
        #Remember that these may include items from the previous loop or from the initialization.
        #However, since we appended those on to the end, we know their indices will be B, B+1,...
        #so we will be able to look out for them.
        sorted_indices = np.argsort(new_active_LODA_scores)
        sorted_scores = new_active_LODA_scores[sorted_indices]

        my_quantile_sorted_index = int(np.floor(len(new_active_LODA_scores)*(1-C_tau)))-1*(C_tau != 1)
        
        #We now have to go through the sorted_indices from the end back towards the 
        #beginning until we manage to gather top_k indices which are less than or equal to B-1.
        top_k_indices = []
        n_indices_so_far = 0
        curr_index = len(sorted_indices)
        while n_indices_so_far < top_k:
            curr_index = curr_index - 1
            next_top_index = sorted_indices[curr_index]
            #print('next top index:',next_top_index)
            if next_top_index < B:
                top_k_indices.append(next_top_index)
                n_indices_so_far = n_indices_so_far + 1
                
        #Now we go and get the labels in Y_new associated with these indices:
        #top_k_indices = np.array(top_k_indices)
        Y_expert = Y_new[top_k_indices]

        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_active_LODA[curr_trial,r] = nY1
        
        #Now is a good time to check whether we just found at least one real anomaly while
        #up to now we only had one fake anomaly:
        if sum(Y_expert==1) > 0 and fake_anomaly==1:
            #We now reset H_A to be empty:
            H_A = np.empty((0, best_m))
            #And we set fake_anomaly to 0 forever:
            fake_anomaly = 0
            
        if sum(Y_expert==1) == 0 and fake_anomaly==1:
            #We reset H_A back to empty again:
            H_A = np.empty((0, best_m))
                
        #We then need to append the relevant unweighted scores to the current H_A and H_N
        for j in range(top_k):
            if Y_expert[j]==1:
                H_A = np.concatenate([H_A,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
            else:
                H_N = np.concatenate([H_N,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
                
        #Here we have to check whether this was the first time H_A actually contained
        #a real anomaly or not, rather than a temporary one (if there were no labeled anomalies)    
        
        #Now since top_k > 0 we know that there is a positive number of labeled data for sure.
        #We always have to retain however the possibility that all of the labeled data so far
        #are nominals. 
        if np.shape(H_A)[0]==0:
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
        else:
            w_new = optimize_w_2(H_A, H_N, q_tau)

        #Update q_tau on this data:
        q_tau = sorted_scores[my_quantile_sorted_index]
        
        #update w_old
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
        temp_unweighted = np.concatenate([H_A,H_N])
        temp_pred = np.matmul(temp_unweighted,w_old)
        temp_YA = [1]*np.shape(H_A)[0]
        temp_YN = [0]*np.shape(H_N)[0]
        temp_Y = temp_YA + temp_YN

        #Update for the next loop:
        curr_L_index = curr_L_index + B
    
    ##########################################################################
    #GLAD
    
    #Initialize some parameters:
    b = 0.5
    mylambda = 1
    top_k = n_send
    q_tau_tm1 = -10e7
    
    # Build the model:
    model_GLAD = build_neural_network(np.shape(X)[1], best_m)
    
    # Compile the model with the custom loss function:
    model_GLAD.compile(optimizer='adam', loss=custom_binary_crossentropy_loss(b=b,mylambda=mylambda), metrics=['accuracy'])
    
    model_GLAD.summary()
    
    # Train the model
    # We use the same n_min used earlier to find the number of LODA projections:
    y_true = np.full((n_min, best_m), b)
    
    model_GLAD.fit(X[:n_min,:], y_true, epochs=10, batch_size=32,verbose=0)
    
    # Sanity check: the output for each data point should all be very close to b:
    row = X[1:2] 
    output = model_GLAD.predict(row)
    
    #INITIALIZATION
    #Dealing with edge cases:
    if n_old == 0:
        X_lab = np.empty([0,np.shape(X)[1]])
        Y_lab = []
        all_labeled_scores = np.empty([0,best_m])
        all_unweighted_scores = np.empty([0,best_m])
    
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted  #We directly use Y_muted, as in all the other methods
        which_lab = [i for i in range(len(Y_old)) if not np.isnan(Y_old[i])]
    
        if len(which_lab) == 0:
            X_lab = np.empty([0,np.shape(X)[1]])
            Y_lab = [] 
            all_unweighted_scores = np.empty([0,best_m])
            all_labeled_scores = np.empty([0,best_m])
        else:
            X_lab = X_old[which_lab,:]
            Y_lab = [Y_old[j] for j in which_lab]
            all_unweighted_scores = np.empty([np.shape(X_old)[0],best_m])
    
            if curr_method == "RandomScore":
                all_unweighted_scores = new_unweighted_scores_fixed[:np.shape(X_old)[0],:]
            else:
                for i, (name, model) in enumerate(models.items()):
                    model.fit(X_old)
                    y_score = model.score_samples(X_old)
                    y_score.dtype = np.float64
                    all_unweighted_scores[:,i] = y_score.squeeze()
    
    
            all_labeled_scores = all_unweighted_scores[which_lab,:]
    
    #LOOPING OVER BATCHES:
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    GLAD_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Calculate all of the scores for X_new:
        all_scores = np.empty([np.shape(X_new)[0],best_m])
    
        if curr_method == "RandomScore":
                all_scores = new_unweighted_scores_fixed[curr_L_index:curr_R_index,:]
        else:
            for i, (name, model) in enumerate(models.items()):
                model.fit(X_new)
                y_score = model.score_samples(X_new)
                y_score.dtype = np.float64
                all_scores[:,i] = y_score.squeeze()
    
    
        #Update:
        all_unweighted_scores = np.concatenate([all_unweighted_scores,all_scores])
    
        # Pass X_new through the current state of the neural network in order to get the weights out:
        curr_w = model_GLAD.predict(X_new)

        X_new_final_scores = np.sum(all_scores * curr_w, axis=1)

        # Sort these scores:
        sorted_indices = np.argsort(X_new_final_scores)
        sorted_scores = X_new_final_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        #print('top k indices:',top_k_indices)
        # Add the scores of the top_k data points to all_labeled_scores:
        all_labeled_scores = np.concatenate([all_labeled_scores,all_scores[top_k_indices,:]])
    
        # Add the relevant data points to X_lab
        X_lab = np.concatenate([X_lab,X_new[top_k_indices,:]])
    
        # Get the true labels of these data points and add them to Y_lab
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
    
        #Update X_so_far (all X data so far):
        X_so_far = X[:(n_old + (r+1)*B),:]
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_GLAD[curr_trial, r] = nY1
    
        curr_w_ext = model_GLAD.predict(X_AUC)
        X_new_final_scores_ext = np.zeros((np.shape(X_AUC)[0],))
        for k in range(np.shape(X_AUC)[0]):
            X_new_final_scores_ext[k] = np.matmul(new_unweighted_validation_scores[k,:],np.transpose(curr_w_ext[k,:]))
        
        GLAD_AUC[r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
        all_GLAD_AUC[curr_trial,r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
    
        fpr, tpr, _ = roc_curve(Y_AUC,X_new_final_scores_ext)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
 
        #Check if this is the first time through:
        if q_tau_tm1 == -10e7:
            #Dealing with q_tau_tm1
            all_weights_so_far = model_GLAD.predict(X_so_far)
            all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
            for k in range(np.shape(X_so_far)[0]):
                all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

            # Calculate the quantile index without fully sorting
            quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
            partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
            q_tau_tm1 = partitioned_array[quantile_index] 
    
        # Next, compile the model with the second custom loss function:
        # 1. Identify the indices of anomalies (Y_lab == 1)
        anomaly_indices = np.where(np.array(Y_lab) == 1)[0]
        
        # 2. Repeat the corresponding rows in X_lab and all_labeled_scores (4 copies for each anomaly)
        X_lab_temp = np.vstack([X_lab] + [X_lab[anomaly_indices]] * 4)  # Repeat rows of X_lab for anomalies
        all_labeled_scores_temp = np.vstack([all_labeled_scores] + [all_labeled_scores[anomaly_indices]] * 4)  # Repeat rows of all_labeled_scores for anomalies
        X_so_far_temp = np.vstack([X_so_far] + [X_lab[anomaly_indices]] * 4)

        # 3. Extend Y_lab_temp with four 1s for each anomaly (4 copies for each anomaly)
        Y_lab_temp = np.concatenate([Y_lab, [1] * 4 * len(anomaly_indices)])
        
        model_GLAD.compile(optimizer='adam', loss=lambda y_true, y_pred: new_custom_loss_2(X_lab_temp, Y_lab_temp, q_tau_tm1, all_labeled_scores_temp,model_GLAD, X_so_far_temp, mylambda, b), metrics=['accuracy'])
    
        # Train the model for further iterations (e.g., 10 epochs)
        model_GLAD.fit(X_lab_temp, np.array(Y_lab_temp), epochs=10, batch_size=32,verbose=0)
    
        #Dealing with q_tau_tm1
        all_weights_so_far = model_GLAD.predict(X_so_far)
        all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
        for k in range(np.shape(X_so_far)[0]):
            all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

        # Calculate the quantile index without fully sorting
        quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
        partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
        q_tau_tm1 = partitioned_array[quantile_index]
        
    
        #Update indices:
        curr_L_index = curr_L_index + B
    
# Calculate column averages for each array
avg_LODA_RS2_2p5 = np.mean(all_LODA_AUC, axis=0)
avg_active_LODA_RS2_2p5 = np.mean(all_active_LODA_AUC, axis=0)
avg_GLAD_RS2_2p5 = np.mean(all_GLAD_AUC, axis=0)

avg_nY1_LODA_RS2_2p5 = np.mean(all_nY1_LODA, axis=0)
avg_nY1_active_LODA_RS2_2p5 = np.mean(all_nY1_active_LODA, axis=0)
avg_nY1_GLAD_RS2_2p5 = np.mean(all_nY1_GLAD, axis=0)

#Save these results:
np.savez("AUC_RS2_2p5.npz",
         avg_LODA_RS2_2p5 = avg_LODA_RS2_2p5,
         avg_active_LODA_RS2_2p5 = avg_active_LODA_RS2_2p5,
         avg_GLAD_RS2_2p5 = avg_GLAD_RS2_2p5,
         avg_nY1_LODA_RS2_2p5 = avg_nY1_LODA_RS2_2p5,
         avg_nY1_active_LODA_RS2_2p5 = avg_nY1_active_LODA_RS2_2p5,
         avg_nY1_GLAD_RS2_2p5 = avg_nY1_GLAD_RS2_2p5,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_RS2_2p5.npz",
         weighted_scores_RS2_2p5 = weighted_scores,
         weighted_validation_scores_RS2_2p5 = weighted_validation_scores,
         X_new_final_scores_ext_RS2_2p5 = X_new_final_scores_ext,
        )

### Plot the AUCs

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the column averages
plt.figure(figsize=(10, 6))

plt.plot(xx,avg_LODA_RS2_2p5, label="LODA", marker='o')
plt.plot(xxactiveLODA,avg_active_LODA_RS2_2p5, label="Active-LODA", marker='s')
plt.plot(xx,avg_GLAD_RS2_2p5, label="GLAD", marker='^')
plt.plot(xx,avg_AAA_RS2_2p5, label="AAA", marker='d')

# Add plot title and labels
plt.xlabel("Batch", fontsize=14)
plt.ylabel("Average AUC", fontsize=14)

plt.xticks(ticks=xx)

# Add legend
plt.legend(loc="best", fontsize=12)

# Add grid for better readability
plt.grid(True)

# Show the plot
plt.show()

### Plot the cumulative anomalies detected

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the four lines
plt.plot(xx, avg_nY1_LODA_RS2_2p5, label="LODA", marker='o')
plt.plot(xxactiveLODA, avg_nY1_active_LODA_RS2_2p5, label="Active-LODA", marker='s')
plt.plot(xx, avg_nY1_GLAD_RS2_2p5, label="GLAD", marker='^')
plt.plot(xx, avg_nY1_AAA_RS2_2p5, label="AAA", marker='d')

# Labels and title
plt.xlabel("Batch")
plt.ylabel("Anomalies detected")
plt.title("Cumulative anomalies detected")

# Legend
plt.legend()

# Grid for better readability
plt.grid(True, linestyle='--', alpha=0.6)

# Show the plot
plt.show()

## The random Gaussian score way III

Here we take each data point, we randomly assign it to nominal or anomaly (via the value of $tau$). We then create ten anomaly detectors, nine of which have uniform random score values, whilst the tenth's scores are generated from different Gaussian distributions depending on whether the relevant point has been assigned to be an anomaly or a nominal.

### Our method

In [None]:
np.random.seed(123456789)

In [None]:
all_AAA_AUC = np.zeros((n_trials,n_loops))
all_nY1_AAA = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 
    
    #Set tau back to 0 for each loop, since we don't want a mixture in the original data now:
    tau = 0
    
    # Specific arguments:
    a_list = [[0.5,0.5]]
    anomaly_cov_list = [ 
        [[0.1,0],[0,0.1]]
    ]
    nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    
    #RESET TAU TO ITS TRUE VALUE:
    tau = 0.01
    
    curr_method = "RandomScore"
    
    nominal_params = (0, 1)
    anomaly_params_list = [(-2.5, 0.01), (2.5, 0.01)]  # Two anomaly distributions
    best_m = n_models
    
    # Build models
    models = build_custom_score_models2(
        M=n_models, 
        tau=tau, 
        nominal_params=nominal_params, 
        anomaly_params_list=anomaly_params_list
    )
    
    # Generate scores for training data
    new_unweighted_scores_fixed = np.zeros((X.shape[0], len(models)))
    
    i = -1
    for name, model in models.items():
        i += 1
        model.fit(X)
        if isinstance(model, ExtendedMixtureGaussianScoreModel):
            new_unweighted_scores_fixed[:, i], Y = model.score_samples(X)
        else:
            new_unweighted_scores_fixed[:, i] = model.score_samples(X)
    
    # Generate scores for validation data
    new_unweighted_validation_scores = np.zeros((X_AUC.shape[0], len(models)))
    
    i = -1
    for name, model in models.items():
        i += 1
        model.fit(X_AUC)
        if isinstance(model, ExtendedMixtureGaussianScoreModel):
            new_unweighted_validation_scores[:, i], Y_AUC = model.score_samples(X_AUC)
        else:
            new_unweighted_validation_scores[:, i] = model.score_samples(X_AUC)
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        Y_muted = np.empty((0,)).astype(float)

    #Slightly modified below to ensure that there is one anomaly on each side, rather than 2 in total only:
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        n_with_anomalies_L = 0
        n_with_anomalies_R = 0
        while (n_remaining_anomalies != 2) or (n_remaining_nominals != 98) or (n_with_anomalies_L != 1) or (n_with_anomalies_R != 1):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            
            
            #Get the score value corresponding to the unmuted anomalies:
            score_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    score_with_anomalies.append(new_unweighted_scores_fixed[i,n_models-1])  # Use append instead of concatenation for efficiency

            #print('score_with_anomalies:',score_with_anomalies)
            if len(score_with_anomalies) > 0:
                # Flatten the list of arrays into a single array
                score_with_anomalies_array = np.vstack(score_with_anomalies)
            
                # Count negatives and positives across all elements
                n_with_anomalies_L = np.sum(score_with_anomalies_array < 0)
                n_with_anomalies_R = np.sum(score_with_anomalies_array > 0)
            else:
                n_with_anomalies_L = 0
                n_with_anomalies_R = 0
                
        if curr_method == "RandomScore" and n_remaining_anomalies > 0:
            related_scores = []
            for jj in range(n_old):
                if Y_muted[jj] == 1:
                    related_scores = related_scores + [new_unweighted_scores_fixed[jj,n_models-1]]
    
    ###############################################################################
    # AAA method
    
    #Choose a supervised method that will be applied:
    supervised_method = 'RandomForestClassifier'
    
    #Initialization
    
    #Dealing with edge cases:
    if n_old == 0:
        X_old = None
        Y_old = None
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted.tolist()  #We directly use Y_muted, as in all the other methods
    
        # Run the initialization function InitActiveAGG_2:
        if curr_method == "RandomScore":
            X_lab, Y_lab, all_labeled_scores = InitActiveAGG_2(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models,curr_method=curr_method,now_scores = new_unweighted_scores_fixed[:n_old,:])
        
        else:
            X_lab, Y_lab, all_labeled_scores = InitActiveAGG_2(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models,curr_method=curr_method)
    
    
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    AAA_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Learn from labeled data, propose new predicted anomalies, and propose other data to label:
        if curr_method == "RandomScore":
            X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG_2(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 0.6,min_n_nom=5,min_n_anom=1,tau_exp=tau,curr_method = curr_method,now_scores = new_unweighted_scores_fixed[curr_L_index:curr_R_index,:])  
        else:
            X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG_2(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 0.6,min_n_nom=5,min_n_anom=1,tau_exp=tau,curr_method = curr_method)  
        
        # Pretend to be the expert and add the true labels to the proposed data:
        expert_provided_labels = [Y_new[j] for j in indices_to_expert]
        Y_lab = Y_lab + expert_provided_labels
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)

        all_nY1_AAA[curr_trial, r] = nY1
        
        #Test the current learned model on the external data in order to calculate the AUC:
        #new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:, 1]
        if learned_model != None:
            new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:,1]
            AAA_AUC[r] = roc_auc_score(Y_AUC,new_preds)
            all_AAA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,new_preds)
        else:
            AAA_AUC[r] = 0.5
            all_AAA_AUC[curr_trial,r] = 0.5
            
        #Update indices:
        curr_L_index = curr_L_index + B
        

# Calculate column averages for each array
avg_AAA_RS3 = np.mean(all_AAA_AUC, axis=0)
avg_nY1_AAA_RS3 = np.mean(all_nY1_AAA, axis=0)

#Save these results:
np.savez("AUC_RS3_onlyAAA.npz",
         avg_AAA_RS3 = avg_AAA_RS3,
         avg_nY1_AAA_RS3 = avg_nY1_AAA_RS3,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_RS3_onlyAAA.npz",
         new_preds_RS3 = new_preds
        )


### The other three methods

In [None]:
np.random.seed(123456789)

In [None]:
all_LODA_AUC = np.zeros((n_trials,n_loops))
all_active_LODA_AUC = np.zeros((n_trials,int(n_loops/2)))
all_GLAD_AUC = np.zeros((n_trials,n_loops))

all_nY1_LODA = np.zeros((n_trials, n_loops))  # Array to store nY1 values
all_nY1_active_LODA = np.zeros((n_trials, int(n_loops/2)))  # Array to store nY1 values
all_nY1_GLAD = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 

    #Set tau back to 0 for each loop, since we don't want a mixture in the original data now:
    tau = 0
    
    # Specific arguments:
    a_list = [[0.5,0.5]]
    anomaly_cov_list = [ 
        [[0.1,0],[0,0.1]]
    ]
    nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
    nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
    L = len(nominal_mean)
    
    # Sampling
    X, Y = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=n_old,                                           # Initial number of data points
        B=B,                                                   # Batch size
        n_loops=n_loops,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    # Massive external data-set from the same distribution to look at AUC
    X_AUC, Y_AUC = sample_data(
        multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
        n_old=5000,                                           # Initial number of data points
        B=0,                                                   # Batch size
        n_loops=0,                                       # Number of batches
        tau=tau,                                               # Fraction of anomalies
        a_list=a_list,                                         # Anomaly means
        anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
        nominal_mean=nominal_mean,                             # Nominal mean
        nominal_cov=nominal_cov,                               # Nominal covariance matrix
        L=L                                                    # Dimensionality
    )
    
    
    #RESET TAU TO ITS TRUE VALUE:
    tau = 0.01
    
    curr_method = "RandomScore"
    
    nominal_params = (0, 1)
    anomaly_params_list = [(-2.5, 0.01), (2.5, 0.01)]  # Two anomaly distributions
    best_m = n_models
    
    # Build models
    models = build_custom_score_models2(
        M=n_models, 
        tau=tau, 
        nominal_params=nominal_params, 
        anomaly_params_list=anomaly_params_list
    )
    
    # Generate scores for training data
    new_unweighted_scores_fixed = np.zeros((X.shape[0], len(models)))
    
    i = -1
    for name, model in models.items():
        i += 1
        model.fit(X)
        if isinstance(model, ExtendedMixtureGaussianScoreModel):
            new_unweighted_scores_fixed[:, i], Y = model.score_samples(X)
        else:
            new_unweighted_scores_fixed[:, i] = model.score_samples(X)
    
    # Generate scores for validation data
    new_unweighted_validation_scores = np.zeros((X_AUC.shape[0], len(models)))
    
    i = -1
    for name, model in models.items():
        i += 1
        model.fit(X_AUC)
        if isinstance(model, ExtendedMixtureGaussianScoreModel):
            new_unweighted_validation_scores[:, i], Y_AUC = model.score_samples(X_AUC)
        else:
            new_unweighted_validation_scores[:, i] = model.score_samples(X_AUC)
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        Y_muted = np.empty((0,)).astype(float)

    #Slightly modified below to ensure that there is one anomaly on each side, rather than 2 in total only:
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        n_with_anomalies_L = 0
        n_with_anomalies_R = 0
        while (n_remaining_anomalies != 2) or (n_remaining_nominals != 98) or (n_with_anomalies_L != 1) or (n_with_anomalies_R != 1):
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            
            
            #Get the score value corresponding to the unmuted anomalies:
            score_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    score_with_anomalies.append(new_unweighted_scores_fixed[i,n_models-1])  # Use append instead of concatenation for efficiency

            if len(score_with_anomalies) > 0:
                # Flatten the list of arrays into a single array
                score_with_anomalies_array = np.vstack(score_with_anomalies)
            
                # Count negatives and positives across all elements
                n_with_anomalies_L = np.sum(score_with_anomalies_array < 0)
                n_with_anomalies_R = np.sum(score_with_anomalies_array > 0)
            else:
                n_with_anomalies_L = 0
                n_with_anomalies_R = 0
    
        if curr_method == "RandomScore" and n_remaining_anomalies > 0:
            related_scores = []
            for jj in range(n_old):
                if Y_muted[jj] == 1:
                    related_scores = related_scores + [new_unweighted_scores_fixed[jj,n_models-1]]
    
    ###################################################################################
    #LODA
    
    curr_L_index = n_old
    LODA_AUC = [0]*n_loops
        
    weighted_scores=np.mean(new_unweighted_validation_scores,axis=1)

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(n_loops):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,best_m))
    
        if curr_method == "RandomScore":
            new_unweighted_scores = new_unweighted_scores_fixed[curr_L_index:curr_R_index ,:]
        else:
            for i, (name, model) in enumerate(models.items()):
                
                model.fit(X_new)
                y_score = model.score_samples(X_new)
                y_score.dtype = np.float64
                new_unweighted_scores[:,i] = y_score.squeeze()

        #Final LODA scores are averages over anomaly detectors
        new_LODA_scores = np.mean(new_unweighted_scores,axis=1)

        #############################################################
        # Sort these scores:
        top_k = n_send
        sorted_indices = np.argsort(new_LODA_scores)
        sorted_scores = new_LODA_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_LODA[curr_trial,r] = nY1
        #############################################################

        
        #############################################################
        LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_scores)
        all_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
    
    #############################################################################################
    #ACTIVE LODA
    
    #We do however have to provide C_tau since it needs to be used before the optimization
    #function. C_tau = 0.03 is the default suggested in Das et al. (2016).
    C_tau = tau 
    
    #In active LODA, the whole budget of n_send items per loop is dedicated to sending the 
    #items with highest predicted scores:
    top_k = n_send
    
    #Use the first n_min data-points in X to get the number and set of LODA projectors:
    #models, best_m, scores = LODA_Choose_M(X[:min(n_min,n_old+B*n_loops),:],M_max=M_max,tau_M=tau_M)
    
    #So now that we have the LODA projectors (i.e., a set of best_m anomaly detectors),
    #we can begin.
    
    #We shall initialize the vector of weights as being equal and summing to 1:
    w_old = np.array([1/best_m for i in range(best_m)])
    
    #We also initialize arrays to put the unweighted scores of labeled data into:
    H_A = np.empty((0, best_m))
    H_N = np.empty((0, best_m))
    
    #We also initialize a fake anomaly alert to 0 (see below). This means basically that
    #we have not so far had to add a "fake anomaly" to the optimization due to there only
    #being labeled nominals so far.
    fake_anomaly = 0
    
    #Unlike basic LODA, here anomaly and nominal labels MATTER. In particular, at the
    #beginning, it matters whether there is initial "old" data, and if so, whether some
    #or all of it is already labeled. If n_old > 0, then we have already calculated Y_muted
    #earlier in this script (for some fixed percentage u of this "old" data for which we
    #suppose we know its true label)
    
    #If there were initial data and at least one labeled nominal (following Das et al. (2016))
    if n_old > 0 and np.sum(Y_muted == 0) > 0:
            
        #Extract the initial data from X:
        X_new = X[:n_old,:]
    
        #Calculate the unweighted scores for each LODA projector on the initial data: 
        new_unweighted_scores = np.empty((n_old,best_m))
    
        if curr_method == "RandomScore":
            new_unweighted_scores = new_unweighted_scores_fixed[:n_old,:]
        else:
            for i, (name, model) in enumerate(models.items()):
        
                model.fit(X_new)
                y_score = model.score_samples(X_new)
                y_score.dtype = np.float64
                new_unweighted_scores[:,i] = y_score.squeeze()
    
        #Calculate the sum of the linear combination of these scores weighted by w_old:
        new_scores = np.matmul(new_unweighted_scores,w_old)
    
        #Sort new_scores from smallest to largest, whilst retaining the indices.
        sorted_indices = np.argsort(new_scores)
        sorted_scores = new_scores[sorted_indices]
    
        #Calculate q_tau on this initial data: 
        #WARNING: one of the underlying problems with active-LODA is that it basically expects
        #anomalies to have the highest scores from the get go. But here, in this first loop,
        #it very well could be that the anomalies have all the LOWEST scores. The calculation
        #of q_tau at this point is therefore a bit weird/pointless. Also, active-LODA basically
        #expects positive weights, especially in its minimization step, with its L2 norm penalty
        #on the weights. This means that active-LODA does not reach its true potential, as
        #defined and coded by Das et al. (2016). The trouble is is that without a penalty on 
        #making the weights not too big, not too small, and not necessarily positive, it will
        #remain suboptimal. 
        my_quantile_sorted_index = int(np.floor(n_old*(1-C_tau)))-1*(C_tau != 1)
        q_tau = sorted_scores[my_quantile_sorted_index]
    
        #Create the arrays H_A and H_N:
        #First we take H_A. In Das et al. (2016) they allow for the case that there
        #are no labeled anomalies, only labeled nominals.
        if np.sum(Y_muted == 1) == 0:
            #Set a "fake anomaly alert to 1":
            fake_anomaly = 1
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            H_N = new_unweighted_scores[(Y_muted==0),:]
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
                    
        else:
            #There is at least one labeled anomaly:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            H_N = new_unweighted_scores[(Y_muted==0),:]
     
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
            
            temp_unweighted = np.concatenate([H_A,H_N])
            temp_pred = np.matmul(temp_unweighted,w_new)
            temp_YA = [1]*np.shape(H_A)[0]
            temp_YN = [0]*np.shape(H_N)[0]
            temp_Y = temp_YA + temp_YN
            
        #We can now update w_old with the value of w_new. If this whole big loop wasn't run,
        #then w_old will stay at its original value.
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
    
    #Else if there were initial data but no labeled nominals: Das et al. (2016)
    #provide no details for what to do here. It is possible though unlikely that
    #in the initial data, there were no labeled nominals but there was at least
    #one labeled anomaly. We do have to check this, since we will need to initialize
    #H_A in this case, even if H_N is empty.
    elif n_old > 0 and np.sum(Y_muted == 0) == 0:
        if np.sum(Y_muted == 1) > 0:
            #initialize H_A:
            H_A = new_unweighted_scores[(Y_muted==1),:]
    
            if curr_method == "RandomScore":
                H_A = new_unweighted_scores_fixed[:n_old,:]
                H_A = H_A[(Y_muted==1),:]
            #Potential Python issue whereby if there is just one anomaly, weird
            #things happen with array shapes. To guard against this:
            if np.sum(Y_muted == 1) == 1:
                if np.shape(H_A) != (1,best_m):
                    H_A = H_A.reshape(1,-1)
                
    #So, at this point, either we had no old data and w_old is still equal weights, 
    #with H_A, H_N empty arrays, or else there was old data, and H_A and H_N may 
    #have been added to (or not), while w_old may or may not have already been 
    #updated.
    
    #We next move to the batch data.
    
    curr_L_index = n_old
    active_LODA_AUC = [0]*n_loops

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(int(n_loops/2)):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        n_new_true_anom = sum(Y_new==1)
        
        new_unweighted_scores = np.empty((B,best_m))
    
        if curr_method == "RandomScore":
            new_unweighted_scores = new_unweighted_scores_fixed[curr_L_index:curr_R_index ,:]
        else:
            for i, (name, model) in enumerate(models.items()):
                
                model.fit(X_new)
                y_score = model.score_samples(X_new)
                y_score.dtype = np.float64
                new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Temporary final active-LODA scores are a linear combination over anomaly detectors:
        temp_new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        #Calculated the weighted scores on the external validation set with the current
        #value of w_old:
        weighted_validation_scores = np.matmul(new_unweighted_validation_scores,w_old)
        active_LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        all_active_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_validation_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
        
        #We actually have to, at this point, attach the current versions of H_A
        #and H_N to new_unweighted_scores, since in this batch framework, we do
        #not have a fixed number of data points from the start to the finish, like
        #they do in Das et al. (2016). If we do not do this, it will affect the
        #calculation of q_tau over time (a kind of bias will be introduced, maybe
        #not the end of the world, but still.)
        
        new_unweighted_scores = np.concatenate([new_unweighted_scores,H_A,H_N])
        
        #Actual final active-LODA scores are a linear combination over anomaly detectors:
        new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        temp_Y = Y_new.tolist() + [1]*np.shape(H_A)[0] + [0]*np.shape(H_N)[0]
        
        #Following the methodology in Das et al. (2016), we should provide the 
        #highest scoring data-point to an expert for labeling. In order to be
        #slightly more general, we shall instead provide the top_k scoring data
        #points to the expert, where top_k has been pre-defined.
        
        #Sort new_active_LODA_scores from smallest to largest, whilst retaining the indices.
        #Remember that these may include items from the previous loop or from the initialization.
        #However, since we appended those on to the end, we know their indices will be B, B+1,...
        #so we will be able to look out for them.
        sorted_indices = np.argsort(new_active_LODA_scores)
        sorted_scores = new_active_LODA_scores[sorted_indices]
    
        my_quantile_sorted_index = int(np.floor(len(new_active_LODA_scores)*(1-C_tau)))-1*(C_tau != 1)
        
        #We now have to go through the sorted_indices from the end back towards the 
        #beginning until we manage to gather top_k indices which are less than or equal to B-1.
        top_k_indices = []
        n_indices_so_far = 0
        curr_index = len(sorted_indices)
        while n_indices_so_far < top_k:
            curr_index = curr_index - 1
            next_top_index = sorted_indices[curr_index]
            if next_top_index < B:
                top_k_indices.append(next_top_index)
                n_indices_so_far = n_indices_so_far + 1

        #Now we go and get the labels in Y_new associated with these indices:
        #top_k_indices = np.array(top_k_indices)
        Y_expert = Y_new[top_k_indices]

        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_active_LODA[curr_trial,r] = nY1
        
        #Now is a good time to check whether we just found at least one real anomaly while
        #up to now we only had one fake anomaly:
        if sum(Y_expert==1) > 0 and fake_anomaly==1:
            #We now reset H_A to be empty:
            H_A = np.empty((0, best_m))
            #And we set fake_anomaly to 0 forever:
            fake_anomaly = 0
            
        if sum(Y_expert==1) == 0 and fake_anomaly==1:
            #We reset H_A back to empty again:
            H_A = np.empty((0, best_m))
                
        #We then need to append the relevant unweighted scores to the current H_A and H_N
        for j in range(top_k):
            if Y_expert[j]==1:
                H_A = np.concatenate([H_A,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
            else:
                H_N = np.concatenate([H_N,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
                
        #Here we have to check whether this was the first time H_A actually contained
        #a real anomaly or not, rather than a temporary one (if there were no labeled anomalies)    
        
        #Now since top_k > 0 we know that there is a positive number of labeled data for sure.
        #We always have to retain however the possibility that all of the labeled data so far
        #are nominals. 
        if np.shape(H_A)[0]==0:
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
        else:
            w_new = optimize_w_2(H_A, H_N, q_tau)
    
                
    
        #Update q_tau on this data:
        q_tau = sorted_scores[my_quantile_sorted_index]
        
        #update w_old
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
        temp_unweighted = np.concatenate([H_A,H_N])
        temp_pred = np.matmul(temp_unweighted,w_old)
        temp_YA = [1]*np.shape(H_A)[0]
        temp_YN = [0]*np.shape(H_N)[0]
        temp_Y = temp_YA + temp_YN
    
        #Update for the next loop:
        curr_L_index = curr_L_index + B
            
    ##########################################################################
    #GLAD
    
    #Initialize some parameters:
    b = 0.5
    mylambda = 1
    top_k = n_send
    q_tau_tm1 = -10e7
    
    # Build the model:
    model_GLAD = build_neural_network(np.shape(X)[1], best_m)
    
    # Compile the model with the custom loss function:
    model_GLAD.compile(optimizer='adam', loss=custom_binary_crossentropy_loss(b=b,mylambda=mylambda), metrics=['accuracy'])
    
    model_GLAD.summary()
    
    # Train the model
    # We use the same n_min used earlier to find the number of LODA projections:
    y_true = np.full((n_min, best_m), b)
    
    model_GLAD.fit(X[:n_min,:], y_true, epochs=10, batch_size=32,verbose=0)
    
    # Sanity check: the output for each data point should all be very close to b:
    row = X[1:2] 
    output = model_GLAD.predict(row)
    
    #INITIALIZATION
    #Dealing with edge cases:
    if n_old == 0:
        X_lab = np.empty([0,np.shape(X)[1]])
        Y_lab = []
        all_labeled_scores = np.empty([0,best_m])
        all_unweighted_scores = np.empty([0,best_m])
    
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted  #We directly use Y_muted, as in all the other methods
        which_lab = [i for i in range(len(Y_old)) if not np.isnan(Y_old[i])]
    
        if len(which_lab) == 0:
            X_lab = np.empty([0,np.shape(X)[1]])
            Y_lab = [] 
            all_unweighted_scores = np.empty([0,best_m])
            all_labeled_scores = np.empty([0,best_m])
        else:
            X_lab = X_old[which_lab,:]
            Y_lab = [Y_old[j] for j in which_lab]
            all_unweighted_scores = np.empty([np.shape(X_old)[0],best_m])
    
            if curr_method == "RandomScore":
                all_unweighted_scores = new_unweighted_scores_fixed[:np.shape(X_old)[0],:]
            else:
                for i, (name, model) in enumerate(models.items()):
                    model.fit(X_old)
                    y_score = model.score_samples(X_old)
                    y_score.dtype = np.float64
                    all_unweighted_scores[:,i] = y_score.squeeze()
    
    
            all_labeled_scores = all_unweighted_scores[which_lab,:]
    
    #LOOPING OVER BATCHES:
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    GLAD_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Calculate all of the scores for X_new:
        all_scores = np.empty([np.shape(X_new)[0],best_m])
    
        if curr_method == "RandomScore":
                all_scores = new_unweighted_scores_fixed[curr_L_index:curr_R_index,:]
        else:
            for i, (name, model) in enumerate(models.items()):
                model.fit(X_new)
                y_score = model.score_samples(X_new)
                y_score.dtype = np.float64
                all_scores[:,i] = y_score.squeeze()
    
    
        #Update:
        all_unweighted_scores = np.concatenate([all_unweighted_scores,all_scores])
    
        # Pass X_new through the current state of the neural network in order to get the weights out:
        curr_w = model_GLAD.predict(X_new)

        X_new_final_scores = np.sum(all_scores * curr_w, axis=1)

        # Sort these scores:
        sorted_indices = np.argsort(X_new_final_scores)
        sorted_scores = X_new_final_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        # Add the scores of the top_k data points to all_labeled_scores:
        all_labeled_scores = np.concatenate([all_labeled_scores,all_scores[top_k_indices,:]])
    
        # Add the relevant data points to X_lab
        X_lab = np.concatenate([X_lab,X_new[top_k_indices,:]])
    
        # Get the true labels of these data points and add them to Y_lab
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
    
        #Update X_so_far (all X data so far):
        X_so_far = X[:(n_old + (r+1)*B),:]
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_GLAD[curr_trial, r] = nY1
    
        curr_w_ext = model_GLAD.predict(X_AUC)
        X_new_final_scores_ext = np.zeros((np.shape(X_AUC)[0],))
        for k in range(np.shape(X_AUC)[0]):
            X_new_final_scores_ext[k] = np.matmul(new_unweighted_validation_scores[k,:],np.transpose(curr_w_ext[k,:]))
        
        GLAD_AUC[r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
        all_GLAD_AUC[curr_trial,r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
    
        fpr, tpr, _ = roc_curve(Y_AUC,X_new_final_scores_ext)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
        #Check if this is the first time through:
        if q_tau_tm1 == -10e7:
            #Dealing with q_tau_tm1
            all_weights_so_far = model_GLAD.predict(X_so_far)
            all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
            for k in range(np.shape(X_so_far)[0]):
                all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

            # Calculate the quantile index without fully sorting
            quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
            partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
            q_tau_tm1 = partitioned_array[quantile_index]
    
        # Next, compile the model with the second custom loss function:
        # 1. Identify the indices of anomalies (Y_lab == 1)
        anomaly_indices = np.where(np.array(Y_lab) == 1)[0]
        
        # 2. Repeat the corresponding rows in X_lab and all_labeled_scores (4 copies for each anomaly)
        X_lab_temp = np.vstack([X_lab] + [X_lab[anomaly_indices]] * 4)  # Repeat rows of X_lab for anomalies
        all_labeled_scores_temp = np.vstack([all_labeled_scores] + [all_labeled_scores[anomaly_indices]] * 4)  # Repeat rows of all_labeled_scores for anomalies
        X_so_far_temp = np.vstack([X_so_far] + [X_lab[anomaly_indices]] * 4)

        # 3. Extend Y_lab_temp with four 1s for each anomaly (4 copies for each anomaly)
        Y_lab_temp = np.concatenate([Y_lab, [1] * 4 * len(anomaly_indices)])
        
        model_GLAD.compile(optimizer='adam', loss=lambda y_true, y_pred: new_custom_loss_2(X_lab_temp, Y_lab_temp, q_tau_tm1, all_labeled_scores_temp,model_GLAD, X_so_far_temp, mylambda, b), metrics=['accuracy'])
    
        # Train the model for further iterations (e.g., 10 epochs)
        model_GLAD.fit(X_lab_temp, np.array(Y_lab_temp), epochs=10, batch_size=32,verbose=0)
    
        #Dealing with q_tau_tm1
        all_weights_so_far = model_GLAD.predict(X_so_far)
        all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
        for k in range(np.shape(X_so_far)[0]):
            all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

        # Calculate the quantile index without fully sorting
        quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
        partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
        q_tau_tm1 = partitioned_array[quantile_index]
        
    
        #Update indices:
        curr_L_index = curr_L_index + B

# Calculate column averages for each array
avg_LODA_RS3 = np.mean(all_LODA_AUC, axis=0)
avg_active_LODA_RS3 = np.mean(all_active_LODA_AUC, axis=0)
avg_GLAD_RS3 = np.mean(all_GLAD_AUC, axis=0)

avg_nY1_LODA_RS3 = np.mean(all_nY1_LODA, axis=0)
avg_nY1_active_LODA_RS3 = np.mean(all_nY1_active_LODA, axis=0)
avg_nY1_GLAD_RS3 = np.mean(all_nY1_GLAD, axis=0)

#Save these results:
np.savez("AUC_RS3.npz",
         avg_LODA_RS3 = avg_LODA_RS3,
         avg_active_LODA_RS3 = avg_active_LODA_RS3,
         avg_GLAD_RS3 = avg_GLAD_RS3,
         avg_nY1_LODA_RS3 = avg_nY1_LODA_RS3,
         avg_nY1_active_LODA_RS3 = avg_nY1_active_LODA_RS3,
         avg_nY1_GLAD_RS3 = avg_nY1_GLAD_RS3,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_RS3.npz",
         weighted_scores_RS3 = weighted_scores,
         weighted_validation_scores_RS3 = weighted_validation_scores,
         X_new_final_scores_ext_RS3 = X_new_final_scores_ext,
        )

### Plot the AUCs

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the column averages
plt.figure(figsize=(10, 6))

plt.plot(xx,avg_LODA_RS3, label="LODA", marker='o')
plt.plot(xxactiveLODA,avg_active_LODA_RS3, label="Active-LODA", marker='s')
plt.plot(xx,avg_GLAD_RS3, label="GLAD", marker='^')
plt.plot(xx,avg_AAA_RS3, label="AAA", marker='d')

# Add plot title and labels
plt.xlabel("Batch", fontsize=14)
plt.ylabel("Average AUC", fontsize=14)

plt.xticks(ticks=xx)

# Add legend
plt.legend(loc="best", fontsize=12)

# Add grid for better readability
plt.grid(True)

# Show the plot
plt.show()

### Plot the cumulative anomalies detected

In [None]:
xx = list(range(1,n_loops+1))
xxactiveLODA = list(range(1,int(n_loops/2)+1))

# Plot the four lines
plt.plot(xx, avg_nY1_LODA_RS3, label="LODA", marker='o')
plt.plot(xxactiveLODA, avg_nY1_active_LODA_RS3, label="Active-LODA", marker='s')
plt.plot(xx, avg_nY1_GLAD_RS3, label="GLAD", marker='^')
plt.plot(xx, avg_nY1_AAA_RS3, label="AAA", marker='d')

# Labels and title
plt.xlabel("Batch")
plt.ylabel("Anomalies detected")
plt.title("Cumulative anomalies detected")

# Legend
plt.legend()

# Grid for better readability
plt.grid(True, linestyle='--', alpha=0.6)

# Show the plot
plt.show()

## Bring back all saved variables and plot them

In [None]:
# Ours
data_RS1_onlyAAA = np.load("AUC_RS1_onlyAAA.npz")
avg_AAA_RS1 = data_RS1_onlyAAA["avg_AAA_RS1"]
avg_nY1_AAA_RS1 = data_RS1_onlyAAA["avg_nY1_AAA_RS1"]
# The others
data_RS1 = np.load("AUC_RS1.npz")
avg_LODA_RS1 = data_RS1["avg_LODA_RS1"]
avg_active_LODA_RS1 = data_RS1["avg_active_LODA_RS1"]
avg_GLAD_RS1 = data_RS1["avg_GLAD_RS1"]
avg_nY1_LODA_RS1 = data_RS1["avg_nY1_LODA_RS1"]
avg_nY1_active_LODA_RS1 = data_RS1["avg_nY1_active_LODA_RS1"]
avg_nY1_GLAD_RS1 = data_RS1["avg_nY1_GLAD_RS1"]

# Ours
data_RS2_onlyAAA = np.load("AUC_RS2_onlyAAA.npz")
avg_AAA_RS2 = data_RS2_onlyAAA["avg_AAA_RS2"]
avg_nY1_AAA_RS2 = data_RS2_onlyAAA["avg_nY1_AAA_RS2"]
# The others
data_RS2 = np.load("AUC_RS2.npz")
avg_LODA_RS2 = data_RS2["avg_LODA_RS2"]
avg_active_LODA_RS2 = data_RS2["avg_active_LODA_RS2"]
avg_GLAD_RS2 = data_RS2["avg_GLAD_RS2"]
avg_nY1_LODA_RS2 = data_RS2["avg_nY1_LODA_RS2"]
avg_nY1_active_LODA_RS2 = data_RS2["avg_nY1_active_LODA_RS2"]
avg_nY1_GLAD_RS2 = data_RS2["avg_nY1_GLAD_RS2"]

# Ours
data_RS2_2p5_onlyAAA = np.load("AUC_RS2_2p5_onlyAAA.npz")
avg_AAA_RS2_2p5 = data_RS2_2p5_onlyAAA["avg_AAA_RS2_2p5"]
avg_nY1_AAA_RS2_2p5 = data_RS2_2p5_onlyAAA["avg_nY1_AAA_RS2_2p5"]
# The others
data_RS2_2p5 = np.load("AUC_RS2_2p5.npz")
avg_LODA_RS2_2p5 = data_RS2_2p5["avg_LODA_RS2_2p5"]
avg_active_LODA_RS2_2p5 = data_RS2_2p5["avg_active_LODA_RS2_2p5"]
avg_GLAD_RS2_2p5 = data_RS2_2p5["avg_GLAD_RS2_2p5"]
avg_nY1_LODA_RS2_2p5 = data_RS2_2p5["avg_nY1_LODA_RS2_2p5"]
avg_nY1_active_LODA_RS2_2p5 = data_RS2_2p5["avg_nY1_active_LODA_RS2_2p5"]
avg_nY1_GLAD_RS2_2p5 = data_RS2_2p5["avg_nY1_GLAD_RS2_2p5"]

# Ours
data_RS3_onlyAAA = np.load("AUC_RS3_onlyAAA.npz")
avg_AAA_RS3 = data_RS3_onlyAAA["avg_AAA_RS3"]
avg_nY1_AAA_RS3 = data_RS3_onlyAAA["avg_nY1_AAA_RS3"]
# The others
data_RS3 = np.load("AUC_RS3.npz")
avg_LODA_RS3 = data_RS3["avg_LODA_RS3"]
avg_active_LODA_RS3 = data_RS3["avg_active_LODA_RS3"]
avg_GLAD_RS3 = data_RS3["avg_GLAD_RS3"]
avg_nY1_LODA_RS3 = data_RS3["avg_nY1_LODA_RS3"]
avg_nY1_active_LODA_RS3 = data_RS3["avg_nY1_active_LODA_RS3"]
avg_nY1_GLAD_RS3 = data_RS3["avg_nY1_GLAD_RS3"]

In [None]:
# Create the subplot grid (3 rows, 2 columns)
fig, axes = plt.subplots(4, 3, figsize=(12, 16))
#axes = axes.flatten()

################################################################################
## PLOTS

my_centre = 0.5
#Set tau back to 0 for each loop, since we don't want a mixture in the original data now:
tau = 0

# Specific arguments:
a_list = [[0.5,0.5]]
anomaly_cov_list = [ 
    [[0.1,0],[0,0.1]]
]
nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
L = len(nominal_mean)

# Sampling
X, Y = sample_data(
    multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
    n_old=n_old,                                           # Initial number of data points
    B=B,                                                   # Batch size
    n_loops=n_loops,                                       # Number of batches
    tau=tau,                                               # Fraction of anomalies
    a_list=a_list,                                         # Anomaly means
    anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
    nominal_mean=nominal_mean,                             # Nominal mean
    nominal_cov=nominal_cov,                               # Nominal covariance matrix
    L=L                                                    # Dimensionality
)


#RESET TAU TO ITS TRUE VALUE:
tau = 0.01

curr_method = "RandomScore"

# Build the models dictionary
models = build_random_score_models(M=n_models)
best_m = n_models

new_unweighted_scores_fixed = np.zeros((X.shape[0], len(models)))
    
# Iterate through the models and get their scores
for i, (name, model) in enumerate(models.items()):
    model.fit(X)
    y_score = model.score_samples(X)
    y_score.dtype = np.float64
    new_unweighted_scores_fixed[:, i] = y_score.squeeze()

# (Re)Define the labels Y based on some feature of the data. Here to begin with 
# this will be some subinterval for the scores from the first model.

Y = np.empty((np.shape(X)[0],))
anom_interval_min = my_centre - (tau)
anom_interval_max = my_centre +  (tau)
for i in range(len(Y)):
    Y[i] = 1*(anom_interval_min <=  new_unweighted_scores_fixed[i,0] <= anom_interval_max) 

first_column_scores = new_unweighted_scores_fixed[:,0]

# Separate the scores based on their labels
nominal_scores = first_column_scores[Y == 0]
anomaly_scores = first_column_scores[Y == 1]

# Calculate the number of nominal and anomaly points
num_nominal = len(nominal_scores)
num_anomaly = len(anomaly_scores)

# Create a range of values for plotting
x_range = np.linspace(min(first_column_scores), max(first_column_scores), 1000).reshape(-1, 1)

# Fit KDE for nominal and anomaly scores
kde_nominal = KernelDensity(kernel='gaussian', bandwidth=0.005).fit(nominal_scores.reshape(-1, 1))
kde_anomaly = KernelDensity(kernel='gaussian', bandwidth=0.005).fit(anomaly_scores.reshape(-1, 1))

# Compute densities for the range of x values
nominal_density = np.exp(kde_nominal.score_samples(x_range))
anomaly_density = np.exp(kde_anomaly.score_samples(x_range))

# Compute the mixture model (weighted sum of the two densities)
mixture_density = (num_nominal / len(first_column_scores)) * nominal_density + \
                  (num_anomaly / len(first_column_scores)) * anomaly_density

# Plot the densities
ax = axes[0, 0]

# Plot the Nominal distribution
ax.plot(x_range, nominal_density, label='Nominal Class (Y=0)', color='blue', linestyle='--')

# Plot the Anomaly distribution
ax.plot(x_range, anomaly_density, label='Anomaly Class (Y=1)', color='red', linestyle='--')

# Plot the Mixture model
#ax.plot(x_range, mixture_density, label='Mixture Model', color='black', linewidth=2)

# Adding labels and title
ax.set_xlabel('Score', fontsize=15)
ax.set_ylabel('Density', fontsize=15)
ax.set_title('Score densities', fontsize=17)
ax.grid(True)

# Add legend
legend_elements = [
    Line2D([0], [0], color='blue', lw=2, label="Nominals"),
    Line2D([0], [0], color='red', lw=2, label="Anomalies"),
]
ax.legend(handles=legend_elements, loc='upper right', fontsize=12)



################################################################################
## Second method:
nominal_params = (0, 1)
anomaly_params = (0, 0.05)
### WARNING: The true anomaly_params are (0, 0.01) but it makes for a fugly plot,
### so instead here we set the variance to 0.05 instead for visual purposes.

#Set tau back to 0 for each loop, since we don't want a mixture in the original data now:
tau = 0

# Specific arguments:
a_list = [[0.5,0.5]]
anomaly_cov_list = [ 
    [[0.1,0],[0,0.1]]
]
nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
L = len(nominal_mean)

# Sampling
X, Y = sample_data(
    multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
    n_old=n_old,                                           # Initial number of data points
    B=B,                                                   # Batch size
    n_loops=n_loops,                                       # Number of batches
    tau=tau,                                               # Fraction of anomalies
    a_list=a_list,                                         # Anomaly means
    anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
    nominal_mean=nominal_mean,                             # Nominal mean
    nominal_cov=nominal_cov,                               # Nominal covariance matrix
    L=L                                                    # Dimensionality
)

#RESET TAU TO ITS TRUE VALUE:
tau = 0.01

curr_method = "RandomScore"

best_m = n_models

# Build models
models = build_custom_score_models(M=n_models, tau=tau, nominal_params=nominal_params, anomaly_params=anomaly_params)

new_unweighted_scores_fixed = np.zeros((X.shape[0], len(models)))

# Generate scores
i = -1
for name, model in models.items():
    i = i + 1
    model.fit(X)
    if isinstance(model, MixtureGaussianScoreModel):
        new_unweighted_scores_fixed[:, i], Y = model.score_samples(X)
    else:
        new_unweighted_scores_fixed[:, i] = model.score_samples(X)

ax = axes[1, 0]

# Visual verification of densities:
last_column_scores = new_unweighted_scores_fixed[:, -1]

# Separate the scores based on their labels
nominal_scores = last_column_scores[Y == 0]
anomaly_scores = last_column_scores[Y == 1]

# Number of points in each class
num_nominal = len(nominal_scores)
num_anomaly = len(anomaly_scores)

# Create a range of values for plotting
x_range = np.linspace(min(last_column_scores), max(last_column_scores), 1000)

# Create the two distributions (Nominal and Anomaly)
nominal_dist = norm.pdf(x_range, nominal_params[0], nominal_params[1])
anomaly_dist = norm.pdf(x_range, anomaly_params[0], anomaly_params[1])

# Compute the mixture model (weighted sum of the two distributions)
mixture_dist = (1 - tau) * nominal_dist + tau * anomaly_dist


# Plot the Nominal distribution
ax.plot(x_range, nominal_dist, label='Nominal Class (Y=0)', color='blue', linestyle='--')

# Plot the Anomaly distribution
ax.plot(x_range, anomaly_dist, label='Anomaly Class (Y=1)', color='red', linestyle='--')

# Plot the Mixture model
#ax.plot(x_range, mixture_dist, label='Mixture Model', color='black', linewidth=2)

# Adding labels and title
ax.set_xlabel('Score', fontsize=15)
ax.set_ylabel('Density', fontsize=15)
ax.set_title('Score densities', fontsize=17)
ax.legend(loc="best")
ax.grid(True)


# Add legend
legend_elements = [
    Line2D([0], [0], color='blue', lw=2, label="Nominals"),
    Line2D([0], [0], color='red', lw=2, label="Anomalies")
]
ax.legend(handles=legend_elements, loc='upper right', fontsize=12)

#####################################################################################################
## Third method

nominal_params = (0, 1)
anomaly_params = (2.5, 0.05)
### WARNING: The true anomaly_params are (0, 0.01) but it makes for a fugly plot,
### so instead here we set the variance to 0.05 instead for visual purposes.

#Set tau back to 0 for each loop, since we don't want a mixture in the original data now:
tau = 0

# Specific arguments:
a_list = [[0.5,0.5]]
anomaly_cov_list = [ 
    [[0.1,0],[0,0.1]]
]
nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
L = len(nominal_mean)

# Sampling
X, Y = sample_data(
    multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
    n_old=n_old,                                           # Initial number of data points
    B=B,                                                   # Batch size
    n_loops=n_loops,                                       # Number of batches
    tau=tau,                                               # Fraction of anomalies
    a_list=a_list,                                         # Anomaly means
    anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
    nominal_mean=nominal_mean,                             # Nominal mean
    nominal_cov=nominal_cov,                               # Nominal covariance matrix
    L=L                                                    # Dimensionality
)

#RESET TAU TO ITS TRUE VALUE:
tau = 0.01

curr_method = "RandomScore"

best_m = n_models

# Build models
models = build_custom_score_models(M=n_models, tau=tau, nominal_params=nominal_params, anomaly_params=anomaly_params)

new_unweighted_scores_fixed = np.zeros((X.shape[0], len(models)))

# Generate scores
i = -1
for name, model in models.items():
    i = i + 1
    model.fit(X)
    if isinstance(model, MixtureGaussianScoreModel):
        new_unweighted_scores_fixed[:, i], Y = model.score_samples(X)
    else:
        new_unweighted_scores_fixed[:, i] = model.score_samples(X)



ax = axes[2, 0]

# Visual verification of densities:
last_column_scores = new_unweighted_scores_fixed[:, -1]

# Separate the scores based on their labels
nominal_scores = last_column_scores[Y == 0]
anomaly_scores = last_column_scores[Y == 1]

# Number of points in each class
num_nominal = len(nominal_scores)
num_anomaly = len(anomaly_scores)

# Create a range of values for plotting
x_range = np.linspace(min(last_column_scores), max(last_column_scores), 1000)

# Create the two distributions (Nominal and Anomaly)
nominal_dist = norm.pdf(x_range, nominal_params[0], nominal_params[1])
anomaly_dist = norm.pdf(x_range, anomaly_params[0], anomaly_params[1])

# Compute the mixture model (weighted sum of the two distributions)
mixture_dist = (1 - tau) * nominal_dist + tau * anomaly_dist


# Plot the Nominal distribution
ax.plot(x_range, nominal_dist, label='Nominal Class (Y=0)', color='blue', linestyle='--')

# Plot the Anomaly distribution
ax.plot(x_range, anomaly_dist, label='Anomaly Class (Y=1)', color='red', linestyle='--')

# Plot the Mixture model
#ax.plot(x_range, mixture_dist, label='Mixture Model', color='black', linewidth=2)

# Adding labels and title
ax.set_xlabel('Score', fontsize=15)
ax.set_ylabel('Density', fontsize=15)
ax.set_title('Score densities', fontsize=17)
ax.legend(loc="best")
ax.grid(True)


# Add legend
legend_elements = [
    Line2D([0], [0], color='blue', lw=2, label="Nominals"),
    Line2D([0], [0], color='red', lw=2, label="Anomalies")
]
ax.legend(handles=legend_elements, loc='center left', fontsize=12)



#####################################################################################################
## Fourth method

nominal_params = (0, 1)
anomaly_params_list = [(-2.5, 0.05), (2.5, 0.05)]  # Two anomaly distributions
### WARNING: The true anomaly_params are  have variance 0.01 but it makes for a fugly plot,
### so instead here we set the variance to 0.05 instead for visual purposes


#Set tau back to 0 for each loop, since we don't want a mixture in the original data now:
tau = 0

# Specific arguments:
a_list = [[0.5,0.5]]
anomaly_cov_list = [ 
    [[0.1,0],[0,0.1]]
]
nominal_mean = np.array([0,0])    # Mean of the nominal Gaussian distribution
nominal_cov = np.array([[1,0],[0,1]])   # Covariance of the nominal Gaussian distribution
L = len(nominal_mean)

# Sampling
X, Y = sample_data(
    multivariate_gaussian_sampling_with_anomaly_gaussians, # Sampling scheme
    n_old=n_old,                                           # Initial number of data points
    B=B,                                                   # Batch size
    n_loops=n_loops,                                       # Number of batches
    tau=tau,                                               # Fraction of anomalies
    a_list=a_list,                                         # Anomaly means
    anomaly_cov_list=anomaly_cov_list,                     # Anomaly covariance matrices
    nominal_mean=nominal_mean,                             # Nominal mean
    nominal_cov=nominal_cov,                               # Nominal covariance matrix
    L=L                                                    # Dimensionality
)

#RESET TAU TO ITS TRUE VALUE:
tau = 0.01

curr_method = "RandomScore"

best_m = n_models


# Build models
models = build_custom_score_models2(
    M=n_models, 
    tau=tau, 
    nominal_params=nominal_params, 
    anomaly_params_list=anomaly_params_list
)

# Generate scores for training data
new_unweighted_scores_fixed = np.zeros((X.shape[0], len(models)))

i = -1
for name, model in models.items():
    i += 1
    model.fit(X)
    if isinstance(model, ExtendedMixtureGaussianScoreModel):
        new_unweighted_scores_fixed[:, i], Y = model.score_samples(X)
    else:
        new_unweighted_scores_fixed[:, i] = model.score_samples(X)



ax = axes[3, 0]

last_column_scores = new_unweighted_scores_fixed[:, -1]

# Separate the scores based on their labels
nominal_scores = last_column_scores[Y == 0]
anomaly_scores = last_column_scores[Y == 1]

# Number of points in each class
num_nominal = len(nominal_scores)
num_anomaly = len(anomaly_scores)

# Create a range of values for plotting
x_range = np.linspace(min(last_column_scores), max(last_column_scores), 1000)

# Create the nominal distribution
nominal_dist = norm.pdf(x_range, nominal_params[0], nominal_params[1])

# Create the two anomaly distributions
anomaly_dist_1 = norm.pdf(x_range, anomaly_params_list[0][0], anomaly_params_list[0][1])
anomaly_dist_2 = norm.pdf(x_range, anomaly_params_list[1][0], anomaly_params_list[1][1])

# Compute the mixture model (weighted sum of the distributions)
mixture_dist = (
    (1 - tau) * nominal_dist +
    (tau / 2) * anomaly_dist_1 +
    (tau / 2) * anomaly_dist_2
)


# Plot the Nominal distribution
ax.plot(x_range, nominal_dist, label='Nominal Class (Y=0)', color='blue', linestyle='--')

# Plot the Anomaly distributions
ax.plot(x_range, anomaly_dist_1, label='Anomaly Class 1 (Y=1)', color='red', linestyle='--')
ax.plot(x_range, anomaly_dist_2, label='Anomaly Class 2 (Y=1)', color='green', linestyle='--')

# Plot the Mixture model
#ax.plot(x_range, mixture_dist, label='Mixture Model', color='black', linewidth=2)

# Adding labels and title
ax.set_xlabel('Score', fontsize=15)
ax.set_ylabel('Density', fontsize=15)
ax.set_title('Score densities', fontsize=17)
plt.legend(loc="best")
plt.grid(True)


# Add legend
legend_elements = [
    Line2D([0], [0], color='blue', lw=2, label="Nominals"),
    Line2D([0], [0], color='red', lw=2, label="Anomalies class 1"),
    Line2D([0], [0], color='green', lw=2, label="Anomalies class 2")
]
ax.legend(handles=legend_elements, loc='upper right', fontsize=12)


######################################################################################################
#### SECOND COLUMN ###################################################################################
######################################################################################################
# Add the AUC subplots too:

xx = list(range(1, n_loops + 1))
xxactiveLODA = list(range(1, int(n_loops/2) + 1))


# Iterate through the sets of arrays for AUC plots
for i, (avg_LODA_plot, avg_active_LODA_plot, avg_GLAD_plot, avg_AAA_plot) in enumerate([
    (avg_LODA_RS1, avg_active_LODA_RS1, avg_GLAD_RS1, avg_AAA_RS1),
    (avg_LODA_RS2, avg_active_LODA_RS2, avg_GLAD_RS2, avg_AAA_RS2),
    (avg_LODA_RS2_2p5, avg_active_LODA_RS2_2p5, avg_GLAD_RS2_2p5, avg_AAA_RS2_2p5),
    (avg_LODA_RS3, avg_active_LODA_RS3, avg_GLAD_RS3, avg_AAA_RS3)
]):
    # Target subplot in the second column (index 1 for columns)
    ax = axes[i, 1]
    
    # Plot the data on the subplot
    ax.plot(xx, avg_LODA_plot, label="LODA",linewidth=1.5)
    ax.plot(xxactiveLODA, avg_active_LODA_plot, label="Active-LODA",linewidth=1.5)
    ax.plot(xx, avg_GLAD_plot, label="GLAD",linewidth=1.5)
    ax.plot(xx, avg_AAA_plot, label="AAA",linewidth=1.5)
    
    # Add labels and grid
    ax.set_xlabel("Batch", fontsize=15)
    ax.set_ylabel("Average AUC", fontsize=15)
    #ax.set_xticks(xx)
    #ax.set_xticks(range(5, n_loops + 1, 5))
    ax.grid(True)
    
    # Add legend
    ax.legend(loc="best", fontsize=13)
    
    # Optional: Add a title for each subplot
    ax.set_title("Average AUC over time", fontsize=17)

###################################
## THIRD COLUMN ###################
###################################

# Third column: Cumulative anomaly detection plots using RS* data
for i, (avg_nY1_LODA_plot, avg_nY1_active_LODA_plot, avg_nY1_GLAD_plot, avg_nY1_AAA_plot) in enumerate([
    (avg_nY1_LODA_RS1, avg_nY1_active_LODA_RS1, avg_nY1_GLAD_RS1, avg_nY1_AAA_RS1),
    (avg_nY1_LODA_RS2, avg_nY1_active_LODA_RS2, avg_nY1_GLAD_RS2, avg_nY1_AAA_RS2),
    (avg_nY1_LODA_RS2_2p5, avg_nY1_active_LODA_RS2_2p5, avg_nY1_GLAD_RS2_2p5, avg_nY1_AAA_RS2_2p5),
    (avg_nY1_LODA_RS3, avg_nY1_active_LODA_RS3, avg_nY1_GLAD_RS3, avg_nY1_AAA_RS3),
]):
    ax = axes[i, 2]

    ax.plot(xx, avg_nY1_LODA_plot, label="LODA", linewidth=1.5, linestyle='-')
    ax.plot(xxactiveLODA, avg_nY1_active_LODA_plot, label="Active-LODA", linewidth=1.5, linestyle=':')
    ax.plot(xx, avg_nY1_GLAD_plot, label="GLAD", linewidth=1.5, linestyle='-.')
    ax.plot(xx, avg_nY1_AAA_plot, label="AAA", linewidth=1.5, linestyle='-')

    ax.set_xlabel("Batch", fontsize=15)
    ax.set_ylabel("Anomalies detected", fontsize=15)
    ax.set_title("Cumul. anomalies detected", fontsize=17)
    ax.grid(True, linestyle='--', alpha=0.6)
    ax.legend(fontsize=13)

# Adjust layout for better spacing
plt.tight_layout()

# Save the plot:
fig.savefig("Score_Scenarios2.pdf", format="pdf", bbox_inches="tight")

# Display the plot
plt.show()

# Time series data trials

Here we generate time series data and then perform anomaly detection on it.

In [None]:
# Redefine tau if necessary:
my_tau = 0.01

In [None]:
## PARAMETERS ##

#Batch size
B = 500

#number of loops:
n_loops=200

# number of trials
n_trials = 5

# percentage to not hide:
u = 0.2

# MAKE THE INITIAL DATA SAME SIZE AS BATCHES:
n_old = B

#tau
my_tau = 0.01

#Number of dimensions:
my_d = 10
#Number of data points:
n = n_old + B*n_loops
#Number of validation points (must be a multiple of B):
n_AUC = B*n_loops
#Number of nearest neighbors in the LOF method:
n_neighbors = int(np.ceil(B*my_tau))
mean_G1=np.full(my_d, 5) 
mean_G2=np.full(my_d, 5.5) 
mean_G3=np.full(my_d, 6)
c_anom=0.01
#Number of included LODA models:
n_LODA_models = 5

### Only our method

In [None]:
np.random.seed(123456789)

In [None]:
all_AAA_AUC = np.zeros((n_trials,n_loops))
all_nY1_AAA = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 
    
    # Generate the data
    X, Y = generate_time_series(my_tau=my_tau, d=my_d, n = n,mean_G1=mean_G1, mean_G2=mean_G2, mean_G3=mean_G3,c_anom = c_anom)
    X_AUC, Y_AUC = generate_time_series(my_tau=my_tau, d=my_d, n = n_AUC,mean_G1=mean_G1, mean_G2=mean_G2, mean_G3=mean_G3,c_anom = c_anom)   
    
    
    # Masking the old data, if it exists:
    if n_old == 0:
        Y_muted = np.empty((0,)).astype(float)

    #Need at least 2 anomalies in the old data for these simulations to function
    if np.sum(Y[:n_old] == 1) < 2:
        raise ValueError("There are not at least two true anomalies in the old data.")
    
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
        #while myplay==0:
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]


    # Define additional models to include
    extra_models = {
        "IsolationForest": IsolationForest(),
        "OneClassSVM": OneClassSVM(),
        "EuclideanDifferenceAnomalyDetector": EuclideanDifferenceAnomalyDetector(),
        "LocalOutlierFactor": LocalOutlierFactor(novelty=True),
        "RandomScore": RandomScore(),
    }
    
    # Create all models (including the LODA models and additional ones)
    models = Create_Anomaly_Models(my_d, n_LODA_models=n_LODA_models, additional_models=extra_models)
    
    #Calculate the unweighted scores on the massive external validation set. Here, to be more precise,
    #since we now have score functions that are contextual per batch, it makes sense to calculate raw
    #scores on the external validation set per batch of size B too.
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],len(models)))
    for j in range(n_loops):
        #left and right indices:
        inlef = B*j 
        inrig = B*(j+1)
        new_unweighted_validation_scores[inlef:inrig,:] = Compute_Model_Scores(X_AUC[inlef:inrig,:],models)

    # Since active LODA has convergence issues with the raw scores, we have to rescale the above just for
    # active-LODA
    new_unweighted_validation_scores_AL = (new_unweighted_validation_scores - new_unweighted_validation_scores.min(axis=0)) / (new_unweighted_validation_scores.max(axis=0) - new_unweighted_validation_scores.min(axis=0)) + 0.0000000001

    #####################################################################################################################
    # Our trials: 
    
    #Choose a supervised method that will be applied:
    supervised_method = 'RandomForestClassifier'

    #Initialization

    #Dealing with edge cases:
    if n_old == 0:
        X_old = None
        Y_old = None
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted.tolist()  #We directly use Y_muted, as in all the other methods
    
    # Run the initialization function InitActiveAGG:
    X_lab, Y_lab, all_labeled_scores = InitActiveAGG(X_old = X_old,Y_old = Y_old,n_data_min = 100, models=models)


    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    AAA_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Learn from labeled data, propose new predicted anomalies, and propose other data to label:
        X_old, X_lab, all_labeled_scores, indices_to_expert, learned_model, supervised_indices = ActiveAGG(X_new = X_new, X_old = X_old, X_lab = X_lab, Y_lab = Y_lab, all_labeled_scores = all_labeled_scores, models=models,supervised_method = supervised_method,n_data_min = 100,n_data_max = B, min_n_labeled = 5,n_send=n_send,pc_top = 0.6,min_n_nom=5,min_n_anom=1,tau_exp=0.001)    
        # Pretend to be the expert and add the true labels to the proposed data:
        expert_provided_labels = [Y_new[j] for j in indices_to_expert]
        Y_lab = Y_lab + expert_provided_labels
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_AAA[curr_trial, r] = nY1
        
        #Test the current learned model on the external data in order to calculate the AUC:
        #new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:, 1]
        if learned_model != None:
            new_preds = learned_model.predict_proba(new_unweighted_validation_scores)[:,1]
            AAA_AUC[r] = roc_auc_score(Y_AUC,new_preds)
            all_AAA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,new_preds)
        else:
            AAA_AUC[r] = 0.5
            all_AAA_AUC[curr_trial,r] = 0.5

        #Update indices:
        curr_L_index = curr_L_index + B

# Calculate column averages for each array
avg_AAA_TS1 = np.mean(all_AAA_AUC, axis=0)
avg_nY1_AAA_TS1 = np.mean(all_nY1_AAA, axis=0)

#Save these results:
np.savez("AUC_TS1_onlyAAA.npz",
         avg_AAA_TS1 = avg_AAA_TS1,
         avg_nY1_AAA_TS1 = avg_nY1_AAA_TS1,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_TS1_onlyAAA.npz",
         new_preds_TS1 = new_preds
        )

### The other three methods

In [None]:
np.random.seed(123456789)

In [None]:
all_LODA_AUC = np.zeros((n_trials,n_loops))
all_active_LODA_AUC = np.zeros((n_trials,int(n_loops/4)))
all_GLAD_AUC = np.zeros((n_trials,n_loops))

all_nY1_LODA = np.zeros((n_trials, n_loops))  # Array to store nY1 values
all_nY1_active_LODA = np.zeros((n_trials, int(n_loops/4)))  # Array to store nY1 values
all_nY1_GLAD = np.zeros((n_trials, n_loops))  # Array to store nY1 values

for curr_trial in range(n_trials):
    print('We are in trial',curr_trial+1,'out of',n_trials) 
    
    # Generate the data
    X, Y = generate_time_series(my_tau=my_tau, d=my_d, n = n,mean_G1=mean_G1, mean_G2=mean_G2, mean_G3=mean_G3,c_anom = c_anom)
    X_AUC, Y_AUC = generate_time_series(my_tau=my_tau, d=my_d, n = n_AUC,mean_G1=mean_G1, mean_G2=mean_G2, mean_G3=mean_G3,c_anom = c_anom)   
    
    # Masking the old data, if it exists:
    if n_old == 0:
        #print('There is no initial data.')
        Y_muted = np.empty((0,)).astype(float)

    #Need at least 2 anomalies in the old data for these simulations to function
    if np.sum(Y[:n_old] == 1) < 2:
        raise ValueError("There are not at least two true anomalies in the old data.")
    
    
    if n_old > 0:
        n_remaining_anomalies = 0
        n_remaining_nominals = 100
        while (n_remaining_anomalies != 2) & (n_remaining_nominals != 98):
        #while myplay==0:
            n_old_anomalies = np.sum(Y[:n_old] == 1)
            n_old_nominals = n_old - n_old_anomalies
            #Calculate how many labels to show and how many to hide:
            n_hide = int(np.ceil((1-u)*n_old)) # if u > 0 then at least one label will be shown due to ceiling function
            n_hide_pc = (n_hide/n_old)*100
            #Randomly select n_hide of the n_old data points to mask:
            permute_indices = np.random.permutation(n_old)
            # The n_anomalies indices in permute_indices will correspond to the anomalies:
            hide_indices = permute_indices[0:n_hide]
            #Fill in Y_muted:
            Y_muted = Y[:n_old].astype(float)
            Y_muted[hide_indices] = np.nan
            n_remaining_anomalies = np.sum(Y_muted == 1)
            n_remaining_nominals = np.sum(Y_muted == 0)
            print('There were',n_old_anomalies,'anomalies and',n_old_nominals,'nominals in the initial data. After randomly masking',n_hide_pc,'% of the initial data, there remain',n_remaining_anomalies,'labeled anomalies and',n_remaining_nominals,'labeled nominals.')
            #Get the values of X corresponding to the unmuted anomalies:
            X_with_anomalies = []
            for i in range(n_old):
                if Y_muted[i] == 1:
                    X_with_anomalies = X_with_anomalies + [X[i,:]]


    # Define additional models to include
    extra_models = {
        "IsolationForest": IsolationForest(),
        "OneClassSVM": OneClassSVM(),
        "EuclideanDifferenceAnomalyDetector": EuclideanDifferenceAnomalyDetector(),
        "LocalOutlierFactor": LocalOutlierFactor(novelty=True),
        "RandomScore": RandomScore(),
    }
    
    # Create all models (including the LODA models and additional ones)
    models = Create_Anomaly_Models(my_d, n_LODA_models=n_LODA_models, additional_models=extra_models)
    
    #Calculate the unweighted scores on the massive external validation set. Here, to be more precise,
    #since we now have score functions that are contextual per batch, it makes sense to calculate raw
    #scores on the external validation set per batch of size B too.
    new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],len(models)))
    for j in range(n_loops):
        #left and right indices:
        inlef = B*j 
        inrig = B*(j+1)
        new_unweighted_validation_scores[inlef:inrig,:] = Compute_Model_Scores(X_AUC[inlef:inrig,:],models)

    # Since active LODA has convergence issues with the raw scores, we have to rescale the above just for
    # active-LODA
    new_unweighted_validation_scores_AL = (new_unweighted_validation_scores - new_unweighted_validation_scores.min(axis=0)) / (new_unweighted_validation_scores.max(axis=0) - new_unweighted_validation_scores.min(axis=0)) + 0.0000000001

    #####################################################################################################################
    # LODA TRIALS
    
    curr_L_index = n_old
    LODA_AUC = [0]*n_loops
        
    weighted_scores=np.mean(new_unweighted_validation_scores,axis=1)

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(n_loops):
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,len(models)))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()
            
        #Final LODA scores are averages over anomaly detectors
        new_LODA_scores = np.mean(new_unweighted_scores,axis=1)

        #############################################################
        # Sort these scores:
        top_k = n_send
        sorted_indices = np.argsort(new_LODA_scores)
        sorted_scores = new_LODA_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_LODA[curr_trial,r] = nY1
        #############################################################

        
        #############################################################
        LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_scores)
        all_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
    #####################################################################################################################
    # ACTIVE-LODA trials:
    
    #We do however have to provide C_tau since it needs to be used before the optimization
    #function. C_tau = 0.03 is the default suggested in Das et al. (2016).
    C_tau = tau
    
    #There are hyperparameters that need to be set in advance for this algorithm. However, 
    #for simplicity we assume they tal the default values in the function optimize_w.
    #C_A = 100  #default in their article
    #C_eta = 1000. #default in their article   
    
    #In active LODA, the whole budget of n_send items per loop is dedicated to sending the 
    #items with highest predicted scores:
    top_k = n_send
    
    #So now that we have the LODA projectors (i.e., a set of best_m anomaly detectors),
    #we can begin.
    
    #We shall initialize the vector of weights as being equal and summing to 1:
    w_old = np.array([1/len(models) for i in range(len(models))])
    
    #We also initialize arrays to put the unweighted scores of labeled data into:
    H_A = np.empty((0, len(models)))
    H_N = np.empty((0, len(models)))
    
    #We also initialize a fake anomaly alert to 0 (see below). This means basically that
    #we have not so far had to add a "fake anomaly" to the optimization due to there only
    #being labeled nominals so far.
    fake_anomaly = 0
    
    #Unlike basic LODA, here anomaly and nominal labels MATTER. In particular, at the
    #beginning, it matters whether there is initial "old" data, and if so, whether some
    #or all of it is already labeled. If n_old > 0, then we have already calculated Y_muted
    #earlier in this script (for some fixed percentage u of this "old" data for which we
    #suppose we know its true label)
    
    #If there were initial data and at least one labeled nominal (following Das et al. (2016))
    if n_old > 0 and np.sum(Y_muted == 0) > 0:
            
        #Extract the initial data from X:
        X_new = X[:n_old,:]
    
        #Calculate the unweighted scores for each LODA projector on the initial data: 
        new_unweighted_scores = np.empty((n_old,len(models)))
    
        for i, (name, model) in enumerate(models.items()):
    
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()

        # Active LODA has convergence problems as soon as we are no longer using LODA projections which are 
        # similarly scaled. Thus, here, we rescale scores to [0,1]
        new_unweighted_scores = (new_unweighted_scores - new_unweighted_scores.min(axis=0)) / (new_unweighted_scores.max(axis=0) - new_unweighted_scores.min(axis=0)) + 0.0000000001
    
        #Calculate the sum of the linear combination of these scores weighted by w_old:
        new_scores = np.matmul(new_unweighted_scores,w_old)
    
        #Sort new_scores from smallest to largest, whilst retaining the indices.
        sorted_indices = np.argsort(new_scores)
        sorted_scores = new_scores[sorted_indices]
    
        #Calculate q_tau on this initial data: 
        #WARNING: one of the underlying problems with active-LODA is that it basically expects
        #anomalies to have the highest scores from the get go. But here, in this first loop,
        #it very well could be that the anomalies have all the LOWEST scores. The calculation
        #of q_tau at this point is therefore a bit weird/pointless. Also, active-LODA basically
        #expects positive weights, especially in its minimization step, with its L2 norm penalty
        #on the weights. This means that active-LODA does not reach its true potential, as
        #defined and coded by Das et al. (2016). The trouble is is that without a penalty on 
        #making the weights not too big, not too small, and not necessarily positive, it will
        #remain suboptimal. 
        my_quantile_sorted_index = int(np.floor(n_old*(1-C_tau)))-1*(C_tau != 1)
        q_tau = sorted_scores[my_quantile_sorted_index]
    
        #Create the arrays H_A and H_N:
        #First we take H_A. In Das et al. (2016) they allow for the case that there
        #are no labeled anomalies, only labeled nominals.
        if np.sum(Y_muted == 1) == 0:
            #Set a "fake anomaly alert to 1":
            fake_anomaly = 1
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            H_N = new_unweighted_scores[(Y_muted==0),:]
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
                    
        else:
            #There is at least one labeled anomaly:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            H_N = new_unweighted_scores[(Y_muted==0),:]
           
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
            
            temp_unweighted = np.concatenate([H_A,H_N])
            temp_pred = np.matmul(temp_unweighted,w_new)
            temp_YA = [1]*np.shape(H_A)[0]
            temp_YN = [0]*np.shape(H_N)[0]
            temp_Y = temp_YA + temp_YN
            
        #We can now update w_old with the value of w_new. If this whole big loop wasn't run,
        #then w_old will stay at its original value.
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
    
    #Else if there were initial data but no labeled nominals: Das et al. (2016)
    #provide no details for what to do here. It is possible though unlikely that
    #in the initial data, there were no labeled nominals but there was at least
    #one labeled anomaly. We do have to check this, since we will need to initialize
    #H_A in this case, even if H_N is empty.
    elif n_old > 0 and np.sum(Y_muted == 0) == 0:
        if np.sum(Y_muted == 1) > 0:
            #initialize H_A:
            H_A = new_unweighted_scores[(Y_muted==1),:]
            #Potential Python issue whereby if there is just one anomaly, weird
            #things happen with array shapes. To guard against this:
            if np.sum(Y_muted == 1) == 1:
                if np.shape(H_A) != (1,len(models)):
                    H_A = H_A.reshape(1,-1)
                
    #So, at this point, either we had no old data and w_old is still equal weights, 
    #with H_A, H_N empty arrays, or else there was old data, and H_A and H_N may 
    #have been added to (or not), while w_old may or may not have already been 
    #updated.
    
    #We next move to the batch data.
    
    curr_L_index = n_old
    active_LODA_AUC = [0]*n_loops

    which_lab = [i for i in range(len(Y_muted)) if not np.isnan(Y_muted[i])]
    Y_lab = [Y_muted[j] for j in which_lab]
    
    for r in range(int(n_loops/4)):
        print('Current loop is',r,'of',int(n_loops/4))
        
        curr_R_index = curr_L_index + B
        
        X_new = X[curr_L_index:curr_R_index ,:]
        Y_new = Y[curr_L_index:curr_R_index]
        
        n_new_true_anom = sum(Y_new==1)
        
        #Update for the next loop:
        curr_L_index = curr_L_index + B
        
        new_unweighted_scores = np.empty((B,len(models)))
        
        for i, (name, model) in enumerate(models.items()):
            
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            new_unweighted_scores[:,i] = y_score.squeeze()

        new_unweighted_scores = (new_unweighted_scores - new_unweighted_scores.min(axis=0)) / (new_unweighted_scores.max(axis=0) - new_unweighted_scores.min(axis=0)) + 0.0000000001
            
        #Temporary final active-LODA scores are a linear combination over anomaly detectors:
        temp_new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        #Calculated the weighted scores on the external validation set with the current
        #value of w_old:
        weighted_validation_scores = np.matmul(new_unweighted_validation_scores_AL,w_old)
        active_LODA_AUC[r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        all_active_LODA_AUC[curr_trial,r] = roc_auc_score(Y_AUC,weighted_validation_scores)
        
        fpr, tpr, _ = roc_curve(Y_AUC,weighted_validation_scores)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
        
        #We actually have to, at this point, attach the current versions of H_A
        #and H_N to new_unweighted_scores, since in this batch framework, we do
        #not have a fixed number of data points from the start to the finish, like
        #they do in Das et al. (2016). If we do not do this, it will affect the
        #calculation of q_tau over time (a kind of bias will be introduced, maybe
        #not the end of the world, but still.)
        
        new_unweighted_scores = np.concatenate([new_unweighted_scores,H_A,H_N])
        
        #Actual final active-LODA scores are a linear combination over anomaly detectors:
        new_active_LODA_scores = np.matmul(new_unweighted_scores,w_old)
        
        temp_Y = Y_new.tolist() + [1]*np.shape(H_A)[0] + [0]*np.shape(H_N)[0]
        
        #Following the methodology in Das et al. (2016), we should provide the 
        #highest scoring data-point to an expert for labeling. In order to be
        #slightly more general, we shall instead provide the top_k scoring data
        #points to the expert, where top_k has been pre-defined.
        
        #Sort new_active_LODA_scores from smallest to largest, whilst retaining the indices.
        #Remember that these may include items from the previous loop or from the initialization.
        #However, since we appended those on to the end, we know their indices will be B, B+1,...
        #so we will be able to look out for them.
        sorted_indices = np.argsort(new_active_LODA_scores)
        sorted_scores = new_active_LODA_scores[sorted_indices]
    
        my_quantile_sorted_index = int(np.floor(len(new_active_LODA_scores)*(1-C_tau)))-1*(C_tau != 1)
        
        #We now have to go through the sorted_indices from the end back towards the 
        #beginning until we manage to gather top_k indices which are less than or equal to B-1.
        top_k_indices = []
        n_indices_so_far = 0
        curr_index = len(sorted_indices)
        while n_indices_so_far < top_k:
            curr_index = curr_index - 1
            next_top_index = sorted_indices[curr_index]
            #print('next top index:',next_top_index)
            if next_top_index < B:
                top_k_indices.append(next_top_index)
                n_indices_so_far = n_indices_so_far + 1
                
        #Now we go and get the labels in Y_new associated with these indices:
        #top_k_indices = np.array(top_k_indices)
        Y_expert = Y_new[top_k_indices]

        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
        nY1 = Y_lab.count(1)
        all_nY1_active_LODA[curr_trial,r] = nY1
        
        #Now is a good time to check whether we just found at least one real anomaly while
        #up to now we only had one fake anomaly:
        if sum(Y_expert==1) > 0 and fake_anomaly==1:
            #We now reset H_A to be empty:
            H_A = np.empty((0, len(models)))
            #And we set fake_anomaly to 0 forever:
            fake_anomaly = 0
            
        if sum(Y_expert==1) == 0 and fake_anomaly==1:
            #We reset H_A back to empty again:
            H_A = np.empty((0, len(models)))
                
        #We then need to append the relevant unweighted scores to the current H_A and H_N
        for j in range(top_k):
            if Y_expert[j]==1:
                H_A = np.concatenate([H_A,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
            else:
                H_N = np.concatenate([H_N,new_unweighted_scores[top_k_indices[j]:(top_k_indices[j]+1),:]])
                
        #Here we have to check whether this was the first time H_A actually contained
        #a real anomaly or not, rather than a temporary one (if there were no labeled anomalies)    
        
        #Now since top_k > 0 we know that there is a positive number of labeled data for sure.
        #We always have to retain however the possibility that all of the labeled data so far
        #are nominals. 
        if np.shape(H_A)[0]==0:
            #We take the data point corresponding to the score sorted into the 
            #np.floor(n_old*(1-C_tau))-th position as a proxy for an anomaly.
            H_A = new_unweighted_scores[sorted_indices[my_quantile_sorted_index],:].reshape(1,-1)
            #Update the linear combination coefficients:
            w_new = optimize_w_2(H_A, H_N, q_tau)
        else:
            w_new = optimize_w_2(H_A, H_N, q_tau)
    
                
    
        #Update q_tau on this data:
        q_tau = sorted_scores[my_quantile_sorted_index]
        
        #update w_old
        w_old = w_new
        w_old = w_old / np.linalg.norm(w_old)
        temp_unweighted = np.concatenate([H_A,H_N])
        temp_pred = np.matmul(temp_unweighted,w_old)
        temp_YA = [1]*np.shape(H_A)[0]
        temp_YN = [0]*np.shape(H_N)[0]
        temp_Y = temp_YA + temp_YN


    #####################################################################################################################
    # GLAD trials:
    
    #Initialize some parameters:
    b = 0.5
    mylambda = 1
    top_k = n_send
    q_tau_tm1 = -10e7
    
    # Build the model:
    model_GLAD = build_neural_network(np.shape(X)[1], len(models))
    
    # Compile the model with the custom loss function:
    model_GLAD.compile(optimizer='adam', loss=custom_binary_crossentropy_loss(b=b,mylambda=mylambda), metrics=['accuracy'])
    
    # Train the model
    # We use the same n_min used earlier to find the number of LODA projections:
    y_true = np.full((n_min, len(models)), b)
    
    model_GLAD.fit(X[:n_min,:], y_true, epochs=10, batch_size=32,verbose=0)
    
    # Sanity check: the output for each data point should all be very close to b:
    row = X[1:2] 
    output = model_GLAD.predict(row)
    
    #INITIALIZATION
    #Dealing with edge cases:
    if n_old == 0:
        X_lab = np.empty([0,np.shape(X)[1]])
        Y_lab = []
        all_labeled_scores = np.empty([0,len(models)])
        all_unweighted_scores = np.empty([0,len(models)])
    
    else:
        X_old = X[:n_old,:]
        Y_old = Y_muted  #We directly use Y_muted, as in all the other methods
        which_lab = [i for i in range(len(Y_old)) if not np.isnan(Y_old[i])]
    
        if len(which_lab) == 0:
            X_lab = np.empty([0,np.shape(X)[1]])
            Y_lab = [] 
            all_unweighted_scores = np.empty([0,len(models)])
            all_labeled_scores = np.empty([0,len(models)])
        else:
            X_lab = X_old[which_lab,:]
            Y_lab = [Y_old[j] for j in which_lab]
            all_unweighted_scores = np.empty([np.shape(X_old)[0],len(models)])
            for i, (name, model) in enumerate(models.items()):
                model.fit(X_old)
                y_score = model.score_samples(X_old)
                y_score.dtype = np.float64
                all_unweighted_scores[:,i] = y_score.squeeze()
    
            all_labeled_scores = all_unweighted_scores[which_lab,:]
    
    #LOOPING OVER BATCHES:
    #Deal with indices of X:
    curr_L_index = n_old
    
    #Initialize storage of AUC scores
    GLAD_AUC = [0]*n_loops
    
    #Loop through batches of new data of size B:
    for r in range(n_loops): 
        
        curr_R_index = curr_L_index + B
        
        #Get next batch of data:
        X_new = X[curr_L_index:curr_R_index,:] 
        Y_new = Y[curr_L_index:curr_R_index].tolist()
        
        nYnew1 = Y_new.count(1)
    
        # Calculate all of the scores for X_new:
        all_scores = np.empty([np.shape(X_new)[0],len(models)])
        for i, (name, model) in enumerate(models.items()):
            model.fit(X_new)
            y_score = model.score_samples(X_new)
            y_score.dtype = np.float64
            all_scores[:,i] = y_score.squeeze()
    
        #Update:
        all_unweighted_scores = np.concatenate([all_unweighted_scores,all_scores])
    
        # Pass X_new through the current state of the neural network in order to get the weights out:
        curr_w = model_GLAD.predict(X_new)

        X_new_final_scores = np.sum(all_scores * curr_w, axis=1)
    
        # Sort these scores:
        sorted_indices = np.argsort(X_new_final_scores)
        sorted_scores = X_new_final_scores[sorted_indices]
        top_k_indices = sorted_indices[(B-top_k):]
        # Add the scores of the top_k data points to all_labeled_scores:
        all_labeled_scores = np.concatenate([all_labeled_scores,all_scores[top_k_indices,:]])
    
        # Add the relevant data points to X_lab
        X_lab = np.concatenate([X_lab,X_new[top_k_indices,:]])
    
        # Get the true labels of these data points and add them to Y_lab
        expert_provided_labels = [Y_new[j] for j in top_k_indices]
        Y_lab = Y_lab + expert_provided_labels
    
        #Update X_so_far (all X data so far):
        X_so_far = X[:(n_old + (r+1)*B),:]
        
        nY0 = Y_lab.count(0)
        nY1 = Y_lab.count(1)
        print('There are ',nY0,' labeled non-anomalies and ',nY1,' labeled anomalies so far.')

        all_nY1_GLAD[curr_trial, r] = nY1
    
        curr_w_ext = model_GLAD.predict(X_AUC)
        X_new_final_scores_ext = np.zeros((np.shape(X_AUC)[0],))
        for k in range(np.shape(X_AUC)[0]):
            X_new_final_scores_ext[k] = np.matmul(new_unweighted_validation_scores[k,:],np.transpose(curr_w_ext[k,:]))
        
        GLAD_AUC[r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
        all_GLAD_AUC[curr_trial,r] = roc_auc_score(Y_AUC,X_new_final_scores_ext)
    
        fpr, tpr, _ = roc_curve(Y_AUC,X_new_final_scores_ext)
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
    
        #Check if this is the first time through:
        if q_tau_tm1 == -10e7:
            #Dealing with q_tau_tm1
            all_weights_so_far = model_GLAD.predict(X_so_far)
            all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
            for k in range(np.shape(X_so_far)[0]):
                all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

            # Calculate the quantile index without fully sorting
            quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
            partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
            q_tau_tm1 = partitioned_array[quantile_index]    
    
        # Next, compile the model with the second custom loss function:
        # 1. Identify the indices of anomalies (Y_lab == 1)
        anomaly_indices = np.where(np.array(Y_lab) == 1)[0]
        
        # 2. Repeat the corresponding rows in X_lab and all_labeled_scores (4 copies for each anomaly)
        X_lab_temp = np.vstack([X_lab] + [X_lab[anomaly_indices]] * 4)  # Repeat rows of X_lab for anomalies
        all_labeled_scores_temp = np.vstack([all_labeled_scores] + [all_labeled_scores[anomaly_indices]] * 4)  # Repeat rows of all_labeled_scores for anomalies
        X_so_far_temp = np.vstack([X_so_far] + [X_lab[anomaly_indices]] * 4)

        # 3. Extend Y_lab_temp with four 1s for each anomaly (4 copies for each anomaly)
        Y_lab_temp = np.concatenate([Y_lab, [1] * 4 * len(anomaly_indices)])
        
        model_GLAD.compile(optimizer='adam', loss=lambda y_true, y_pred: new_custom_loss_2(X_lab_temp, Y_lab_temp, q_tau_tm1, all_labeled_scores_temp,model_GLAD, X_so_far_temp, mylambda, b), metrics=['accuracy'])
        
        # Train the model for further iterations (e.g., 10 epochs)
        model_GLAD.fit(X_lab_temp, np.array(Y_lab_temp), epochs=10, batch_size=32,verbose=0)
    
        #Dealing with q_tau_tm1
        all_weights_so_far = model_GLAD.predict(X_so_far)
        all_final_scores_so_far = np.zeros((np.shape(X_so_far)[0],))
        for k in range(np.shape(X_so_far)[0]):
            all_final_scores_so_far[k] = np.matmul(all_unweighted_scores[k,:],np.transpose(all_weights_so_far[k,:]))

        # Calculate the quantile index without fully sorting
        quantile_index = int(np.floor(len(all_final_scores_so_far) * (1 - C_tau))) - 1 * (C_tau != 1)
        partitioned_array = np.partition(all_final_scores_so_far, quantile_index)
        q_tau_tm1 = partitioned_array[quantile_index]
            
        #Update indices:
        curr_L_index = curr_L_index + B

# Calculate column averages for each array
avg_LODA_TS1 = np.mean(all_LODA_AUC, axis=0)
avg_active_LODA_TS1 = np.mean(all_active_LODA_AUC, axis=0)
avg_GLAD_TS1 = np.mean(all_GLAD_AUC, axis=0)

avg_nY1_LODA_TS1 = np.mean(all_nY1_LODA, axis=0)
avg_nY1_active_LODA_TS1 = np.mean(all_nY1_active_LODA, axis=0)
avg_nY1_GLAD_TS1 = np.mean(all_nY1_GLAD, axis=0)

#Save these results:
np.savez("AUC_TS1.npz",
         avg_LODA_TS1 = avg_LODA_TS1,
         avg_active_LODA_TS1 = avg_active_LODA_TS1,
         avg_GLAD_TS1 = avg_GLAD_TS1,
         avg_nY1_LODA_TS1 = avg_nY1_LODA_TS1,
         avg_nY1_active_LODA_TS1 = avg_nY1_active_LODA_TS1,
         avg_nY1_GLAD_TS1 = avg_nY1_GLAD_TS1,
        )

# Save also the score matrices for later (for the plot below)
np.savez("scores_TS1.npz",
         weighted_scores_TS1 = weighted_scores,
         weighted_validation_scores_TS1 = weighted_validation_scores,
         X_new_final_scores_ext_TS1 = X_new_final_scores_ext,
        )

## Load all data and plot it

In [None]:
data_TS1_onlyAAA = np.load("AUC_TS1_onlyAAA.npz")
avg_AAA_TS1 = data_TS1_onlyAAA["avg_AAA_TS1"]
avg_nY1_AAA_TS1 = data_TS1_onlyAAA["avg_nY1_AAA_TS1"]

data_TS1 = np.load("AUC_TS1.npz")
avg_LODA_TS1 = data_TS1["avg_LODA_TS1"]
avg_active_LODA_TS1 = data_TS1["avg_active_LODA_TS1"]
avg_GLAD_TS1 = data_TS1["avg_GLAD_TS1"]
avg_nY1_LODA_TS1 = data_TS1["avg_nY1_LODA_TS1"]
avg_nY1_active_LODA_TS1 = data_TS1["avg_nY1_active_LODA_TS1"]
avg_nY1_GLAD_TS1 = data_TS1["avg_nY1_GLAD_TS1"]

In [None]:
#Generating one of the 10 dimensions of the data for visualization purposes. We simply take the first 
#X_AUC generated in the two code blocks just above (it's the same) and we'll plot just the first dimension, 
#and in colors for #nominals and anomalies. And we will also plot the anomaly distribution(s)

#Generate data and scores 
np.random.seed(123456789)

# Generate the data
X, Y = generate_time_series(my_tau=my_tau, d=my_d, n = n,mean_G1=mean_G1, mean_G2=mean_G2, mean_G3=mean_G3,c_anom = c_anom)
X_AUC, Y_AUC = generate_time_series(my_tau=my_tau, d=my_d, n = n_AUC,mean_G1=mean_G1, mean_G2=mean_G2, mean_G3=mean_G3,c_anom = c_anom)   
# Define additional models to include
extra_models = {
    "IsolationForest": IsolationForest(),
    "OneClassSVM": OneClassSVM(),
    "EuclideanDifferenceAnomalyDetector": EuclideanDifferenceAnomalyDetector(),
    "LocalOutlierFactor": LocalOutlierFactor(novelty=True),
    "RandomScore": RandomScore(),
}
# Create all models (including the LODA models and additional ones)
models = Create_Anomaly_Models(my_d, n_LODA_models=n_LODA_models, additional_models=extra_models)
#Calculate the unweighted scores on the massive external validation set. Here, to be more precise,
#since we now have score functions that are contextual per batch, it makes sense to calculate raw
#scores on the external validation set per batch of size B too.
new_unweighted_validation_scores = np.empty((np.shape(X_AUC)[0],len(models)))
for j in range(n_loops):
    #left and right indices:
    inlef = B*j 
    inrig = B*(j+1)
    new_unweighted_validation_scores[inlef:inrig,:] = Compute_Model_Scores(X_AUC[inlef:inrig,:],models)


xx = list(range(1, n_loops+1))
xxactiveLODAv2 = list(range(1, int(n_loops/4)+1))

fig, axs = plt.subplots(4, 1, figsize=(8, 16))  # 3 rows, 1 column

# First subplot: Scatter plot of the first column of X_AUC vs Y_AUC
scatter = axs[0].scatter(range(500), X_AUC[0:500, 0], 
                         c=['blue' if y == 0 else 'red' for y in Y_AUC[0:500]], s=4, marker='o')

# Set axis labels and title
axs[0].set_xlabel("Time",fontsize=15)
axs[0].set_ylabel("Data value",fontsize=15)
axs[0].set_title("First dimension of time series",fontsize=17)

# Create custom legend entries (without lines, just markers)
blue_marker = mlines.Line2D([], [], color='blue', marker='o', markersize=5, label="Nominal (Y=0)", linestyle='None')
red_marker = mlines.Line2D([], [], color='red', marker='o', markersize=5, label="Anomaly (Y=1)", linestyle='None')

# Add the custom legend (only marker, no lines)
axs[0].legend(handles=[blue_marker, red_marker], loc="best",fontsize=13)

##################################################################################################
##################################################################################################

# Second subplot: KDE Plot for the 8th column of new_unweighted_validation_scores
diffs = new_unweighted_validation_scores[:, 7]  # 8th column (index 7)

# Step 2: Split into nominal and anomaly based on Y_AUC labels
nominal_diffs = diffs[Y_AUC == 0]
anomaly_diffs = diffs[Y_AUC == 1]

# Define x-axis range for the plot
x_min, x_max = np.min(diffs), np.max(diffs)
x_vals = np.linspace(x_min, x_max, 500)

# KDE for nominal (Y=0)
if len(nominal_diffs) > 1:
    kde_nominal = gaussian_kde(nominal_diffs)
    nominal_density = kde_nominal(x_vals) * (1 - my_tau)  # Adjust tau as needed
    axs[1].plot(x_vals, nominal_density, label="Nominal (Y=0)", color="blue")

# KDE for anomaly (Y=1)
if len(anomaly_diffs) > 1:
    kde_anomaly = gaussian_kde(anomaly_diffs)
    anomaly_density = kde_anomaly(x_vals) * my_tau  # Adjust tau as needed
    axs[1].plot(x_vals, anomaly_density, label="Anomaly (Y=1)", color="red")

# Formatting for the second plot
axs[1].set_title(" Weighted 'score density' function for the first anomaly detector",fontsize=17)
axs[1].set_xlabel("Absolute difference anomaly score",fontsize=15)
axs[1].set_ylabel("Density",fontsize=15)
axs[1].legend(fontsize=13)
axs[1].grid(True)
axs[1].set_ylim(0, 0.5)  # Set y-axis limits for consistency

##################################################################################

# Third subplot: Plot column averages from avg_LODA_TS1, avg_active_LODA_TS1, avg_GLAD_TS1, avg_AAA_TS1
axs[2].plot(xx, avg_LODA_TS1, label="LODA", linewidth=1.5)
axs[2].plot(xxactiveLODAv2, avg_active_LODA_TS1, label="Active-LODA", linewidth=1.5)
axs[2].plot(xx, avg_GLAD_TS1, label="GLAD",  linewidth=1.5)
axs[2].plot(xx, avg_AAA_TS1, label="AAA", linewidth=1.5)

# Add plot title and labels for the third subplot
axs[2].set_xlabel("Batch",fontsize=15)
axs[2].set_ylabel("Average AUC",fontsize=15)
axs[2].set_title("Average AUC over time",fontsize=17)

# Customize x-ticks to show at intervals of 5
#axs[2].set_xticks(range(5, n_loops + 1, 5))

# Add legend to the third subplot
axs[2].legend(loc="best", fontsize=13)

# Add grid for better readability in third subplot
axs[2].grid(True)

#############################
# Fourth subplot (Cumulative anomalies detected)
axs[3].plot(xx, avg_nY1_LODA_TS1, label="LODA", linewidth=1.5)
axs[3].plot(xxactiveLODAv2, avg_nY1_active_LODA_TS1, label="Active-LODA", linewidth=1.5)
axs[3].plot(xx, avg_nY1_GLAD_TS1, label="GLAD",linewidth=1.5)
axs[3].plot(xx, avg_nY1_AAA_TS1, label="AAA", linewidth=1.5)

axs[3].set_xlabel("Batch",fontsize=15)
axs[3].set_ylabel("Anomalies detected",fontsize=15)
axs[3].set_title("Cumulative anomalies detected",fontsize=17)
axs[3].legend(fontsize=13)
axs[3].grid(True, linestyle='--', alpha=0.6)

# Adjust layout to prevent overlap
plt.tight_layout()

# Save the figure to PDF
fig.savefig("ContextualScores2.pdf", format="pdf", bbox_inches="tight")

# Show all plots at once
plt.show()