# Imports

In [None]:
import sys
import numpy as np
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn import preprocessing
from scipy import stats
from scipy.stats import ks_2samp
import pandas as pd
from scipy.io.arff import loadarff 
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, LabelEncoder
import time
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import KFold
from numpy import mean
from category_encoders import TargetEncoder
from sklearn.preprocessing import MinMaxScaler, QuantileTransformer, StandardScaler
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import math
from river import drift


# Data Modeling

In [None]:
def read_dataset(path, label):
    dataframe = pd.DataFrame()
    if 'synthetic' in path:
        data = loadarff(path)
        dataframe = pd.DataFrame(data[0])
    if 'real-world' in path:
        dataframe = pd.read_csv(path)
        
    labels = dataframe.loc[:, dataframe.columns == label]
    features = dataframe.loc[:, dataframe.columns != label]
    le = preprocessing.LabelEncoder()
    labels = le.fit_transform(labels)
    return features, labels
    

In [None]:
def prepare_features(features, training_labels, size_training, features_encoder, features_scaler): 
    if(features_encoder is None):
        features = features._get_numeric_data()
    else:
        cols = features.columns
        numerical_cols = features._get_numeric_data().columns
        categorical_cols = list(set(cols) - set(numerical_cols))
        
        if(isinstance(features_encoder, OneHotEncoder)):
            feature_arr = features_encoder.fit_transform(features[categorical_cols])
            feature_labels =  features_encoder.get_feature_names(categorical_cols)
            encoded_features = pd.DataFrame(feature_arr.toarray(), columns=feature_labels)
            
        if(isinstance(features_encoder, OrdinalEncoder)):
            feature_arr = features_encoder.fit_transform(features[categorical_cols])
            encoded_features = pd.DataFrame(feature_arr, columns=categorical_cols)
            
        if(isinstance(features_encoder, TargetEncoder)):
            transform = features_encoder.fit_transform(features[categorical_cols].iloc[:size_training], training_labels)
            training_encoded = pd.DataFrame(transform, columns=categorical_cols)
            testing_encoded = pd.DataFrame(features_encoder.transform(features[categorical_cols].iloc[size_training:len(features)]), columns=categorical_cols)
            encoded_features = training_encoded.append(testing_encoded)
            
        features = features._get_numeric_data().join(encoded_features)
         
    if(features_scaler is True):
        scaler = MinMaxScaler()
        features_training = scaler.fit_transform(features.iloc[:size_training])
        features_testing = scaler.transform(features.iloc[size_training:len(features)])
        features_training_df = pd.DataFrame(features_training, columns=features.columns)
        features_testing_df = pd.DataFrame(features_testing, columns=features.columns)
        features = features_training_df.append(features_testing_df)
        
    if(features_encoder is not None):
        print('categorical features encoder', features_encoder.__class__.__name__)
    else:
        print('no features encoder')
    if(features_scaler is True):
        print('features scaled using MinMaxScaler')
    else:
        print('features are not scaled')
        
    return features

In [None]:
def get_training_data(features, labels, size_training): 
    training_features = features.iloc[:size_training]
    training_labels = labels[:size_training]
    return [training_features, training_labels]

In [None]:
def learn_classifier(training_features, training_labels):
    classifier = RandomForestClassifier(n_estimators = 100)
#     classifier = svm.SVC(probability=True)
    classifier.fit(training_features, training_labels)
#     folds = range(5,31, 1)
#     #evalcrossvaluation
#     # evaluate each k value
#     for k in folds:
#     # define the test condition
#         cv = KFold(n_splits=k, shuffle=True, random_state=10)
#         # record mean and min/max of each set of results
#         k_mean, k_min, k_max = evaluate_model(cv,training_features,training_labels, classifier)
#         # report performance
#         print('-> folds=%d, accuracy=%.3f (%.3f,%.3f)' % (k, k_mean, k_min, k_max))
    return classifier

In [None]:
# evaluate the model
def evaluate_model(cv, X, y, model):
    # evaluate the model
    scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
    # return scores
    return mean(scores), scores.min(), scores.max()

# Drift Detection

## UDetect

In [None]:
def window_summary(features):
    centroid = features.sum(axis=0) / len(features)
    sum_differences = 0
    features = features.values    
    for feature in features:
        sum_differences = sum_differences + np.linalg.norm(feature - centroid.values)
    mean_E_d = sum_differences / (len(features) - 1)
    
    return mean_E_d

In [None]:
def change_parameter(summary_arr):
    training_mean = np.mean(summary_arr)
    std_dev = np.std(summary_arr)
    number_subgroups = len(summary_arr)
    R_sum = 0
    for i in range(number_subgroups - 1):
        R_sum = R_sum + np.linalg.norm(summary_arr[i + 1] - summary_arr[i])
    R_mean = R_sum / (number_subgroups - 1)
    A2 = 0.1
    #print(R_mean)
    #subgroup = 25
    LCL = training_mean -  A2 * R_mean
    UCL = training_mean +  A2 * R_mean
    print('lcl, ucl', LCL, UCL)
    return training_mean, UCL, LCL

In [None]:
def udetect_all_batches(training_data, testing_features, size_batch, encoder, scaler):
    training_features = training_data[0]
    training_labels = training_data[1]
    size_training = len(training_features)
    all_features = training_features.append(testing_features)
    all_features_ready = prepare_features(all_features, training_labels, size_training, encoder, scaler)
    training_features = all_features_ready.iloc[0:size_training, :]
    testing_features = all_features_ready.iloc[size_training:,]
    
    detected_batches = []
    detected = False
    

    size_subgroup = 25
    training_summaries = []
    for i in range(0, size_training, size_subgroup):
        training_summaries.append(window_summary(training_features.iloc[i:i + size_subgroup]))
    training_mean, UCL_Ed, LCL_Ed = change_parameter(training_summaries)    
    testing_summaries = []
    for i in range(0, len(testing_features), size_batch):
        batch_number = int(i / size_batch) + 1
        testing_summaries_batch = []
        for j in range(i, i + size_batch, size_subgroup):
            testing_summaries_batch.append(window_summary(testing_features.iloc[j:j + size_subgroup]))
        batch_mean = np.mean(testing_summaries_batch)
        testing_summaries.append(np.mean(testing_summaries_batch))
    total_batches = int(len(testing_features) / size_batch)
    for i in range(len(testing_summaries)):
        batch_number = i + 1
        summary = testing_summaries[i]
        if(summary < LCL_Ed or summary > UCL_Ed):
            detected_batches.append(batch_number)
            if (detected is False):
                print('detected!')
                detected = True
            
    if (detected is False):
        print('no concept drift detected')
    print('in batches:', detected_batches)
    
    fig, ax = plt.subplots()
    indices = np.arange(1, total_batches + 1)
    plt.axhline(y = UCL_Ed, label='UCL', color='red')
    plt.axhline(y = training_mean, label='training mean', color='green')
    plt.axhline(y = LCL_Ed, label='LCL', color='yellow')
    colors = []
    for summary in testing_summaries:
        if(summary < LCL_Ed or summary > UCL_Ed):
            colors.append('red')
        else:
            colors.append('green')
    ax.bar(indices, np.array(testing_summaries), color=colors)
    plt.ylim(0.9 * LCL_Ed, 1.1 * UCL_Ed)
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.show()
    print()
    
    return detected_batches

In [None]:
def udetect_one_batch(training_data, testing_features, size_batch, encoder, scaler):
    training_features = training_data[0]
    training_labels = training_data[1]
    size_training = len(training_features)
    all_features = training_features.append(testing_features)
    all_features_ready = prepare_features(all_features, training_labels, size_training, encoder, scaler)
    training_features = all_features_ready.iloc[0:size_training, :]
    testing_features = all_features_ready.iloc[size_training:,]
    
    detected_batches = []
    detected = False

    training_summaries = []
    for i in range(0, size_training, size_subgroup):
        training_summaries.append(window_summary(training_features.iloc[i:i + size_subgroup]))
    training_mean, UCL_Ed, LCL_Ed = change_parameter(training_summaries)    

    testing_summaries = []
    for i in range(0, len(testing_features), size_batch):
        batch_number = int(i / size_batch) + 1
        testing_summaries_batch = []
        for j in range(i, i + size_batch, size_subgroup):
            testing_summaries_batch.append(window_summary(testing_features.iloc[j:j + size_subgroup]))
        batch_mean = np.mean(testing_summaries_batch)
        testing_summaries.append(np.mean(testing_summaries_batch))
    total_batches = int(len(testing_features) / size_batch)
    for i in range(len(testing_summaries)):
        batch_number = i + 1
        summary = testing_summaries[i]
        if(summary < LCL_Ed or summary > UCL_Ed):
            detected_batches.append(batch_number)
            if (detected is False):
                print('detected!')
                detected = True
            
    if (detected is False):
        print('no concept drift detected')
    
    fig, ax = plt.subplots()
    indices = np.arange(1, total_batches + 1)
    plt.axhline(y = UCL_Ed, label='UCL', color='red')
    plt.axhline(y = training_mean, label='training mean', color='green')
    plt.axhline(y = LCL_Ed, label='LCL', color='yellow')
    colors = []
    for summary in testing_summaries:
        if(summary < LCL_Ed or summary > UCL_Ed):
            colors.append('red')
        else:
            colors.append('green')
    ax.bar(indices, np.array(testing_summaries), color=colors)
    plt.ylim(0.9 * LCL_Ed, 1.1 * UCL_Ed)
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.show()
    print()
    
    return detected_batches

## SQSI

In [None]:
def compute_scores_training_set_sqsi(classifier, training_features, training_labels):
    countEvents = len(training_features)
    cv = KFold(n_splits=20, shuffle=True)

    scores_training_set= cross_val_predict(classifier, training_features, training_labels, cv=cv, n_jobs=-1, method='predict_proba')
    return scores_training_set

In [None]:
def sqsi_all_batches(training_data, testing_features, size_batch, encoder, scaler):
    training_features = training_data[0]
    training_labels = training_data[1]
    size_training = len(training_features)
    all_features = training_features.append(testing_features)
    all_features_ready = prepare_features(all_features, training_labels, size_training, encoder, scaler)
    training_features = all_features_ready.iloc[0:size_training, :]
    testing_features = all_features_ready.iloc[size_training:,]
    
    classifier = learn_classifier(training_features, training_labels)
    scores_training_set = compute_scores_training_set_sqsi(classifier, training_features, training_labels)
    detected_batches = []
    detected = False
    for i in range(0, len(testing_features), size_batch):
        batch = testing_features[i:i + size_batch]
        predict_probs_batch = classifier.predict_proba(batch)
        probs_batch = predict_probs_batch[:,1]
        probs_training = scores_training_set[:,1]
        p_value = stats.ks_2samp(probs_batch,probs_training)[1]
        batch_number = int((i) / size_batch + 1)
        if(p_value < 0.001):
            if (detected is False):
                print('detected!')
                detected = True
            detected_batches.append(batch_number)
    if (detected is False):
        print('no concept drift detected')
    total_batches = int(len(testing_features) / size_batch)
    fig, ax = plt.subplots()
    indices = np.arange(1, total_batches + 1)
    colors = []
    for i in indices:
        if(i in detected_batches ):
            colors.append('red')
        else:
            colors.append('green')
    ax.bar(indices, np.ones(total_batches), color=colors)
    plt.show()
    print()
    
    return detected_batches

In [None]:
def sqsi_one_batch(training_data, testing_features, encoder, scaler):
    training_features = training_data[0]
    training_labels = training_data[1]
    size_training = len(training_features)
    all_features = training_features.append(testing_features)
    all_features_ready = prepare_features(all_features, training_labels, size_training, encoder, scaler)
    training_features = all_features_ready.iloc[0:size_training, :]
    testing_features = all_features_ready.iloc[size_training:,]
    
    classifier = learn_classifier(training_features, training_labels)
    scores_training_set = compute_scores_training_set_sqsi(classifier, training_features, training_labels)
    detected = False

    predict_probs_batch = classifier.predict_proba(testing_features)
    probs_batch = predict_probs_batch[:,1]
    probs_training = scores_training_set[:,1]
    p_value = stats.ks_2samp(probs_batch,probs_training)[1]
    if(p_value < 0.001):
        print('detected!')
        detected = True
        
    if (detected is False):
        print('no concept drift detected')
    
    return detected

## Label-dependent

In [None]:
def label_dependent_check_drift(training_data, testing_features, labels, size_batch, encoder, scaler, detector):
    training_features = training_data[0]
    training_labels = training_data[1]
    size_training = len(training_features)
    all_features = training_features.append(testing_features)
    all_features_ready = prepare_features(all_features, training_labels, size_training, encoder, scaler)
    training_features = all_features_ready.iloc[0:size_training, :]
    testing_features = all_features_ready.iloc[size_training:,]
    
    classifier = learn_classifier(training_features, training_labels)
    detected_batches = []
    for i in range(0, len(testing_features), size_batch):
        detected = False
        batch = testing_features[i:i + size_batch]
        y_predicted = classifier.predict(batch)
        y_true = labels[len(training_features) + i:len(training_features) + i + size_batch]
        diff = np.array(np.absolute(y_predicted - y_true))
        for j in range(0, len(diff)):
            _ = detector.update(diff[j])
            if detector.drift_detected:
                detected = True
        batch_number = int((i) / size_batch + 1)
        if(detected):
            detected_batches.append(batch_number)
            
    total_batches = int(len(testing_features) / size_batch)
    fig, ax = plt.subplots()
    indices = np.arange(1, total_batches + 1)
    colors = []
    for i in indices:
        if(i in detected_batches ):
            colors.append('red')
        else:
            colors.append('green')
    ax.bar(indices, np.ones(total_batches), color=colors)
    plt.show()
    print()
    
    return detected_batches

# Evaluation

## Synthetic data sets

In [None]:
def evaluate_runs_synthetic(actual_batch, predicted_batches, size_testing_batches):
    size = len(predicted_batches)
    latency = np.zeros(size)
    false_positives_rates = np.zeros(size)
    print('predicted batches:', predicted_batches)
    
    for i in range(size):
        current_run = predicted_batches[i]
        current_latency = 1
        current_false_positives = 0
        latency_found = False
        for j in range(len(current_run)):
            if(current_run[j] < actual_batch):
                current_false_positives += 1
            if(latency_found is False and current_run[j] < actual_batch):
                current_latency = 0
                latency_found = True
            if(latency_found is False and current_run[j] >= actual_batch):
                current_latency = (current_run[j] - actual_batch) / (size_testing_batches - actual_batch + 1)
                latency_found = True
                
        false_positives_rates[i] = current_false_positives / (actual_batch - 1)
        latency[i] = current_latency
    print("latency:", np.mean(latency))
    print("false positives rate:", np.mean(false_positives_rates))
    return [np.mean(latency), np.mean(false_positives_rates)]

In [None]:
def run_tests_synthetic(path, label, size_dataset, size_training, size_batch, drift_start):
    start = time.time()
    print('dataset:', path)
    #adjust size of data set so that testing batches are equal in size
    size_dataset = size_training + int((size_dataset - size_training)/ size_batch) * size_batch
    print('size dataset: ' + str(size_dataset) + ', size training: ' + str(size_training) + ', size testing batch: ' + str(size_batch))

    actual_batch = int((drift_start - size_training) / size_batch) + 1
    print('actual concept drift is in batch', actual_batch)    
    print()
    predicted_batches = []
    number_testing_batches = (size_dataset - size_training) / size_batch
    features, labels = read_dataset(path, label)
    
    training_data = get_training_data(features, labels, size_training)
    testing_features = features.iloc[size_training:size_dataset, :]
    
    #run SQSI detector on the synthetic data sets
    print("SQSI")
    encoders =  [None, OrdinalEncoder(), TargetEncoder(), OneHotEncoder()]
    scalers = [False, True]
    if ('sea_1' in path):
        encoders = [None]
        
    if('gradual' in path):
        encoders = [OrdinalEncoder()]
        
    for encoder in encoders:
        for scaler in scalers:
            sqsi_drifted_batches = sqsi_all_batches(training_data, testing_features, size_batch, encoder, scaler)
            predicted_batches.append(sqsi_drifted_batches)
            latency, fpr_s = evaluate_runs_synthetic(actual_batch, [sqsi_drifted_batches], number_testing_batches)
            print('latency', format(latency, '.8f'))
            print('false positive rate', format(fpr_s, '.8f'))
            print()
            
    #run UDETECT detector on the synthetic data sets
    print("UDetect")
    encoders =  [None, OrdinalEncoder(), TargetEncoder(), OneHotEncoder()]
    scalers = [True]
    if ('sea_1' in path):
        encoders = [None]
        scalers = [False]

    if('gradual' in path):
        encoders = [OrdinalEncoder()]
        scalers = [False]
        
    for encoder in encoders:
        for scaler in scalers:
            sqsi_drifted_batches = udetect_all_batches(training_data, testing_features, size_batch, encoder, scaler)
            predicted_batches.append(sqsi_drifted_batches)
            latency, fpr_s = evaluate_runs_synthetic(actual_batch, [sqsi_drifted_batches], number_testing_batches)

            print()
    
#     average_latency, average_fpr_s = evaluate_runs_synthetic(actual_batch, predicted_batches, number_testing_batches)
#     print('-----')
#     print('metrics based on all runs for the data set')
#     print('average Latency', format(average_latency, '.8f'))
#     print('average FPR_S', format(average_fpr_s, '.8f'))

    end = time.time()
    print("duration of test: " + str(int((end - start) / 60)) + ' minutes')
    print()
    print()


### Run tests on synthetic data!

In [None]:
datasets = ['synthetic/abrupt_drift/sea_1_abrupt_drift_0_noise_balanced.arff',
            'synthetic/abrupt_drift/agraw1_1_abrupt_drift_0_noise_balanced.arff',
            'synthetic/abrupt_drift/agraw2_1_abrupt_drift_0_noise_balanced.arff',
            'synthetic/gradual_drift/sea_1_gradual_drift_0_noise_balanced_05.arff',
            'synthetic/gradual_drift/sea_1_gradual_drift_0_noise_balanced_1.arff',
            'synthetic/gradual_drift/sea_1_gradual_drift_0_noise_balanced_5.arff',
            'synthetic/gradual_drift/sea_1_gradual_drift_0_noise_balanced_10.arff',
            'synthetic/gradual_drift/sea_1_gradual_drift_0_noise_balanced_20.arff',
            'synthetic/gradual_drift/agraw2_1_gradual_drift_0_noise_balanced_05.arff',
            'synthetic/gradual_drift/agraw2_1_gradual_drift_0_noise_balanced_1.arff',
            'synthetic/gradual_drift/agraw2_1_gradual_drift_0_noise_balanced_5.arff',
            'synthetic/gradual_drift/agraw2_1_gradual_drift_0_noise_balanced_10.arff',
            'synthetic/gradual_drift/agraw2_1_gradual_drift_0_noise_balanced_20.arff',
            'synthetic/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_05.arff',
            'synthetic/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_1.arff',
            'synthetic/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_5.arff',
            'synthetic/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_10.arff',
            'synthetic/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_20.arff',
           ]
length_datasets = len(datasets)
for i in range(length_datasets):
    run_tests_synthetic(datasets[i], 'class', 100000, 30000, 10000, 55000)


## Real-world data sets

In [None]:
def evaluate_runs_real_world(reference_drifted_batches, predicted_batches, number_batches):
    size = len(predicted_batches)
    drift_detection_rates = np.zeros(size)
    false_positives_rates = np.zeros(size)
    print('predicted batches:', predicted_batches)
    print('number batches', number_batches)
    
    for i in range(size):
        current_run = predicted_batches[i]
        correctly_detected = 0
        incorrectly_detected = 0
        for j in range(len(current_run)):
            if(current_run[j] in reference_drifted_batches):
                correctly_detected += 1
            else:
                incorrectly_detected += 1
        if(len(reference_drifted_batches) == 0):
            drift_detection_rates[i] = 0
        else:
            drift_detection_rates[i] = correctly_detected / len(reference_drifted_batches)
        false_positives_rates[i] = incorrectly_detected / (number_batches - len(reference_drifted_batches))
      
    DDR = np.mean(drift_detection_rates)
    FPR_R = np.mean(false_positives_rates)
    print("DDR:", DDR)
    print("FPR_R:", FPR_R)
    return [FPR_R, DDR]

In [None]:
def run_tests_real_world(path, label, size_dataset, size_training, size_batch):
    start = time.time()
    print('dataset:', path)
    #adjust size of data set so that testing batches are equal in size
    size_dataset = size_training + int((size_dataset - size_training)/ size_batch) * size_batch
    print('size dataset: ' + str(size_dataset) + ', size training: ' + str(size_training) + ', size testing batch: ' + str(size_batch))

    print()
    predicted_batches = []
    number_testing_batches = (size_dataset - size_training) / size_batch
    features, labels = read_dataset(path, label)
    
    training_data = get_training_data(features, labels, size_training)
    testing_features = features.iloc[size_training:size_dataset, :]
    reference_drifted_batches_seq = []
    reference_drifted_batches_time = []
    number_testing_batches = (size_dataset - size_training) / size_batch
    
    scalers = []
    encoders = []
    
    if('spam' in path):
        scalers = [False]
        encoders = [None]
        if(size_batch == 100):
            reference_drifted_batches_seq  = [7, 8, 10, 11, 12, 13, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
            reference_drifted_batches_time = [3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
        if(size_batch == 50):
            reference_drifted_batches_seq  = [11, 14, 15, 16, 17, 19, 20, 21, 22, 23, 25, 27, 29, 30, 31, 32, 33, 34, 35, 37, 40, 41, 42, 44, 45, 46, 47, 48, 50, 51, 52, 53, 55, 56, 57, 58]
            reference_drifted_batches_time = [1, 3, 4, 5, 7, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58]
        if(size_batch == 20): 
            reference_drifted_batches_seq  = [10, 14, 21, 26, 32, 33, 35, 36, 37, 39, 40, 41, 42, 47, 49, 50, 51, 52, 53, 54, 56, 57, 58, 60, 61, 62, 63, 67, 68, 72, 73, 76, 78, 80, 81, 82, 83, 85, 86, 87, 88, 91, 92, 93, 95, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 111, 112, 114, 115, 116, 117, 118, 119, 120, 123, 124, 125, 126, 127, 128, 130, 131, 132, 135, 136, 137, 138, 139, 141, 142, 143, 145]
            reference_drifted_batches_time = [2, 10, 11, 14, 17, 21, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 41, 42, 46, 47, 48, 49, 50, 51, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 68, 70, 72, 73, 74, 76, 77, 78, 79, 80, 82, 83, 85, 86, 87, 88, 90, 92, 93, 95, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145]
            
    if('weather' in path):
        scalers = [True]
        encoders = [None]
        if(size_batch == 365):    
            reference_drifted_batches_seq  = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33]
            reference_drifted_batches_time = [2, 3, 4, 6, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 28, 29, 30, 32, 33]
        if(size_batch == 30):
            reference_drifted_batches_seq  = [6, 8, 11, 12, 16, 17, 18, 19, 20, 21, 27, 28, 29, 30, 31, 32, 39, 41, 42, 43, 44, 49, 50, 51, 52, 53, 54, 55, 58, 61, 62, 63, 64, 65, 66, 67, 68, 69, 73, 74, 75, 76, 77, 78, 79, 80, 81, 86, 87, 88, 89, 90, 91, 92, 93, 94, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 236, 239, 245, 247, 248, 249, 250, 251, 254, 262, 263, 264, 265, 266, 269, 271, 272, 273, 275, 276, 277, 278, 279, 281, 283, 284, 285, 286, 287, 288, 290, 291, 292, 297, 299, 302, 305, 306, 307, 308, 309, 311, 312, 313, 314, 315, 316, 319, 322, 324, 330, 332, 333, 334, 335, 336, 338, 339, 340, 342, 343, 346, 347, 350, 351, 352, 356, 357, 358, 359, 360, 361, 362, 363, 367, 372, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 396, 397, 399, 400, 403]
            reference_drifted_batches_time = [1, 10, 11, 12, 13, 14, 18, 24, 25, 26, 27, 28, 29, 33, 35, 36, 38, 43, 44, 45, 46, 47, 48, 49, 50, 56, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 76, 79, 80, 81, 83, 84, 85, 92, 95, 96, 106, 107, 108, 117, 125, 126, 127, 129, 130, 132, 133, 134, 135, 140, 141, 143, 144, 145, 146, 147, 148, 149, 150, 152, 153, 154, 155, 156, 157, 160, 161, 162, 164, 165, 166, 167, 172, 175, 176, 177, 178, 179, 180, 182, 185, 186, 188, 189, 190, 191, 192, 194, 195, 196, 199, 200, 201, 202, 203, 205, 206, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 233, 235, 238, 239, 244, 246, 247, 248, 249, 250, 251, 254, 255, 256, 257, 259, 260, 261, 262, 263, 264, 265, 266, 267, 269, 271, 272, 273, 274, 275, 276, 277, 278, 279, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 295, 297, 298, 299, 302, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 319, 320, 322, 324, 325, 330, 332, 333, 334, 335, 336, 337, 338, 339, 340, 342, 343, 346, 347, 349, 350, 351, 352, 355, 356, 357, 358, 359, 360, 361, 362, 363, 367, 372, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 391, 395, 396, 397, 399, 400, 401, 402, 403]
       
    if('elect' in path):
        scalers = [True]
        encoders = [None]
        if(size_batch == 365):
            reference_drifted_batches_seq = [2, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 81, 82]
            reference_drifted_batches_time = [2, 3, 4, 5, 6, 8, 9, 10, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82]
    
    if('airline' in path):
        scalers = [True]
        encoders = [OrdinalEncoder(), OneHotEncoder(), TargetEncoder()]
        reference_drifted_batches_seq  = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
        reference_drifted_batches_time = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]

    # #run UDETECT detector on the real-world data sets
    print("UDetect")
    for encoder in encoders:
        for scaler in scalers:
            if(isinstance(encoder, OneHotEncoder)):
                reference_drifted_batches_seq = [2,3,5,6,7,8,9,10,11,12,13,15,16,17,18,19,20,21]
            if(isinstance(encoder, OrdinalEncoder)):
                reference_drifted_batches_seq = [2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,18,19,20,21]
            if(isinstance(encoder, TargetEncoder)):
                reference_drifted_batches_seq = [2]
                
        udetect_drifted_batches = udetect_all_batches(training_data, testing_features, size_batch, encoder, scaler)
        predicted_batches.append(udetect_drifted_batches)
        print('sequential split')
        FPR_R_seq, DDR_seq = evaluate_runs_real_world(reference_drifted_batches_seq, [udetect_drifted_batches], number_testing_batches)
        if('airline' not in path):
            print('time-based split')
            FPR_R_time, DDR_time = evaluate_runs_real_world(reference_drifted_batches_time, [udetect_drifted_batches], number_testing_batches)
        print()
    
#     average_fpr_seq_split, average_ddr_seq_split = evaluate_runs_real_world(reference_drifted_batches_seq, predicted_batches, number_testing_batches)
#     average_fpr_time_split, average_ddr_time_split = evaluate_runs_real_world(reference_drifted_batches_time, predicted_batches, number_testing_batches)
#     print('-----')
#     print('metrics based on all runs for the data set')
#     print('average FPR_R', format((average_fpr_seq_split + average_fpr_time_split) / 2, '.8f'))
#     print('average DDR', format((average_ddr_seq_split + average_ddr_time_split) / 2, '.8f'))

#     #run SQSI detector on the real-world data sets
    print("SQSI")
    for encoder in encoders:
        for scaler in scalers:
            if(isinstance(encoder, OneHotEncoder)):
                reference_drifted_batches_seq = [2,3,5,6,7,8,9,10,11,12,13,15,16,17,18,19,20,21]
            if(isinstance(encoder, OrdinalEncoder)):
                reference_drifted_batches_seq = [2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,18,19,20,21]
            if(isinstance(encoder, TargetEncoder)):
                reference_drifted_batches_seq = [2]
                
        sqsi_drifted_batches = sqsi_all_batches(training_data, testing_features, size_batch, encoder, scaler)
        predicted_batches.append(sqsi_drifted_batches)
        print('sequential split')
        FPR_R_seq, DDR_seq = evaluate_runs_real_world(reference_drifted_batches_seq, [sqsi_drifted_batches], number_testing_batches)
        if('airline' not in path):
            print('time-based split')
            FPR_R_time, DDR_time = evaluate_runs_real_world(reference_drifted_batches_time, [sqsi_drifted_batches], number_testing_batches)
        print()
    
#     average_fpr_seq_split, average_ddr_seq_split = evaluate_runs_real_world(reference_drifted_batches_seq, predicted_batches, number_testing_batches)
#     average_fpr_time_split, average_ddr_time_split = evaluate_runs_real_world(reference_drifted_batches_time, predicted_batches, number_testing_batches)
#     print('-----')
#     print('metrics based on all runs for the data set')
#     print('average FPR_R', format((average_fpr_seq_split + average_fpr_time_split) / 2, '.8f'))
#     print('average DDR', format((average_ddr_seq_split + average_ddr_time_split) / 2, '.8f'))
        
    #run label-dependent-detectors on the real-world data sets  
    print("label-dependent detectors")
    detectors = [drift.EDDM(), drift.DDM(), drift.ADWIN(), drift.HDDM_A(), drift.HDDM_W()]
    for encoder in encoders:
        for scaler in scalers:
            for detector in detectors:
                print(detector.__class__.__name__)
                if('airline' in path and size_batch == 17000): 
                    if(isinstance(encoder, OneHotEncoder)):
                        reference_drifted_batches_seq = [2,3,5,6,7,8,9,10,11,12,13,15,16,17,18,19,20,21]
                    if(isinstance(encoder, OrdinalEncoder)):
                        reference_drifted_batches_seq = [2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,18,19,20,21]
                    if(isinstance(encoder, TargetEncoder)):
                        reference_drifted_batches_seq = [2]
                    
                label_dependent_batches = label_dependent_check_drift(training_data, testing_features, labels, size_batch, encoder, scaler, detector)
                predicted_batches.append(label_dependent_batches)
                print('sequential split')
                FPR_R_seq, DDR_seq = evaluate_runs_real_world(reference_drifted_batches_seq, [label_dependent_batches], number_testing_batches)
                if('airline' not in path):
                    print('time-based split')
                    FPR_R_time, DDR_time = evaluate_runs_real_world(reference_drifted_batches_time, [label_dependent_batches], number_testing_batches)
                print()
    
#     average_fpr_seq_split, average_ddr_seq_split = evaluate_runs_real_world(reference_drifted_batches_seq, predicted_batches, number_testing_batches)
#     average_fpr_time_split, average_ddr_time_split = evaluate_runs_real_world(reference_drifted_batches_time, predicted_batches, number_testing_batches)
    
#     print('-----')
#     print('metrics based on all runs for the data set')
#     print('average FPR_R', format((average_fpr_seq_split + average_fpr_time_split) / 2, '.8f'))
#     print('average DDR', format((average_ddr_seq_split + average_ddr_time_split) / 2, '.8f'))

    end = time.time()
    print("duration of test: " + str(int((end - start) / 60)) + ' minutes')
    print()
    print()


### Run tests on real-world data!

In [None]:
run_tests_real_world('real-world/electricity_dataset.csv', 'label', 45312, 15104, 365)

In [None]:
run_tests_real_world('real-world/weather_dataset.csv', 'Label_Rain', 18159, 6053, 365)
run_tests_real_world('real-world/weather_dataset.csv', 'Label_Rain', 18159, 6053, 30)

In [None]:
run_tests_real_world('real-world/spam_dataset.csv', 'ACTUAL_LABEL', 4405, 1468, 100)
run_tests_real_world('real-world/spam_dataset.csv', 'ACTUAL_LABEL', 4405, 1468, 20)
run_tests_real_world('real-world/spam_dataset.csv', 'ACTUAL_LABEL', 4405, 1468, 50)

In [None]:
run_tests_real_world('real-world/airline_dataset.csv','Delay', 539383, 179794, 17000)