In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from skmultiflow.data import DataStream
from skmultiflow.evaluation import EvaluatePrequential

In [2]:
from skmultiflow.data import WaveformGenerator, HyperplaneGenerator

# Create folders if they don't exist
os.makedirs('./artificial', exist_ok=True)
os.makedirs('./real-world', exist_ok=True)

# === Generate synthetic datasets if not already saved ===
def generate_synthetic_dataset(generator, num_samples, filename):
    if not os.path.exists(filename):
        X, y = [], []
        for _ in range(num_samples):
            xi, yi = generator.next_sample()
            X.append(xi[0])
            y.append(yi[0])
        df = pd.DataFrame(X)
        df['label'] = y
        df.to_csv(filename, index=False)
        print(f"Saved {filename}")
    else:
        print(f"{filename} already exists, skipping generation.")

# Generate Hyperplane Dataset
hyperplane_gen = HyperplaneGenerator(n_features=10,random_state=2002) # Default constructor already produces 2 classes
generate_synthetic_dataset(
    hyperplane_gen,
    num_samples=100000,
    filename='./artificial/HyperplaneDataset.csv'
)

# Generate Waveform Dataset
waveform_gen = WaveformGenerator(random_state=2002)
generate_synthetic_dataset(
    waveform_gen,
    num_samples=100000,
    filename='./artificial/WaveformDataset.csv'
)

./artificial/HyperplaneDataset.csv already exists, skipping generation.
./artificial/WaveformDataset.csv already exists, skipping generation.


In [3]:
rootDir = './'

artificialDatasets = {}
for filename in os.listdir(rootDir + 'artificial'):
    if filename.endswith('.csv'):
        artificialDatasets[os.path.splitext(filename)[0]] = pd.read_csv(rootDir + 'artificial/' + filename)

realWorldDatasets = {}
for filename in os.listdir(rootDir + 'real-world'):
    if filename.endswith('.csv'):
        realWorldDatasets[os.path.splitext(filename)[0]] = pd.read_csv(rootDir + 'real-world/' + filename)


In [4]:
# Display some results
print(artificialDatasets.keys())
print(realWorldDatasets.keys())

dict_keys(['HyperplaneDataset', 'WaveformDataset'])
dict_keys(['rialto', 'spam'])


## Models

In [5]:
import skmultiflow as skm
from sklearn.neural_network import MLPClassifier

model_awe = skm.meta.AccuracyWeightedEnsembleClassifier()
model_arf = skm.meta.AdaptiveRandomForestClassifier()
model_dwm = skm.meta.DynamicWeightedMajorityClassifier()
model_samKNN = skm.lazy.SAMKNNClassifier()
model_lb = skm.meta.LeveragingBaggingClassifier()
model_mlp = MLPClassifier(hidden_layer_sizes=(16, 16), random_state=2002)


Construct instances of the classification models. For each dataset, use the
Interleaved Test-Then-Train approach to train and evaluate the performance
of these classifiers, using prediction accuracy as the evaluation metric. Re-
port the following results for the classification models on each dataset:
• Overall accuracy: The overall prediction accuracy of the models.
• Prequential accuracy plot: Prequential accuracy is defined as the pre-
diction accuracy of a model over the w most recent data instances. Use
20 evaluation windows of size (dataset size/20) to calculate prequen-
tial accuracy values. Plot the obtained accuracy values over time for
each dataset.
Discuss the performance of the classification models. Evaluate how the MLP
model performs compared to the ensemble models. Analyze the implications
of the observed fluctuations in accuracy values over time in the prequential
accuracy plots.

In [6]:
def evaluate_model_on_datasets(datasets_dict, datasets_name, model, model_name):
    results = {}

    for dataset_name, data in datasets_dict.items():
        print(f"\nEvaluating on {datasets_name} dataset: {dataset_name}")

        # Prepare data stream
        X = data.iloc[:, :-1].values
        y = data.iloc[:, -1].values
        stream = DataStream(X, y)

        if model_name == "MLP":
            model_instance = model.__class__(**model.get_params())
            unique_classes = np.unique(y)
            model_instance.partial_fit(X[:1], y[:1], classes=unique_classes)
        else:
            model_instance = model.__class__()

        # Setup evaluator
        evaluator = EvaluatePrequential(
            show_plot=False,
            pretrain_size=0,
            max_samples=len(y),
            n_wait=len(y)//20,
            metrics=['accuracy'],
            output_file=f'results_{datasets_name}_{dataset_name}_{model_name}.csv'
        )

        # Evaluate model
        evaluator.evaluate(stream=stream, model=[model_instance], model_names=[model_name])

        # Store results
        results[dataset_name] = (evaluator, model_name)

        # Plot (from CSV or measurements if needed)
        plot_prequential_accuracy_from_csv(f'results_{datasets_name}_{dataset_name}_{model_name}.csv', model_name, dataset_name, datasets_name)

    return results


def plot_prequential_accuracy_from_csv(csv_file, model_name, dataset_name, datasets_name):
    df = pd.read_csv(csv_file, comment='#')

    accuracy_values = df['mean_acc_[' + model_name + ']']
    sample_counts = df['id']

    # Plot the accuracy over time
    plt.figure(figsize=(12, 6))
    plt.plot(sample_counts, accuracy_values, label=model_name)
    plt.title(f'Prequential Accuracy on {dataset_name} ({datasets_name})')
    plt.xlabel('Number of samples processed')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)
    plt.savefig(f'prequential_accuracy_{datasets_name}_{dataset_name}_{model_name}.png')
    plt.close()



In [None]:
# Evaluate and report AWE on real datasets
results_awe_real = evaluate_model_on_datasets(realWorldDatasets, "real-world", model_awe, "AWE")

In [22]:
# Evaluate and report AWE on artificial datasets
results_awe_artificial = evaluate_model_on_datasets(artificialDatasets, "artificial", model_awe, "AWE")


Evaluating on artificial dataset: HyperplaneDataset
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [303.98s]
Processed samples: 100000
Mean performance:
AWE - Accuracy     : 0.9199

Evaluating on artificial dataset: WaveformDataset
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [813.20s]
Processed samples: 100000
Mean performance:
AWE - Accuracy     : 0.8190


In [140]:
# Evaluate and report SAMKNN on real datasets
results_samknn_real = evaluate_model_on_datasets(realWorldDatasets, "real-world", model_samKNN, "SAMKNN")


Evaluating on real-world dataset: rialto
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [107.12s]
Processed samples: 82250
Mean performance:
SAMKNN - Accuracy     : 0.8136

Evaluating on real-world dataset: spam
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [43.76s]
Processed samples: 6213
Mean performance:
SAMKNN - Accuracy     : 0.9619


In [141]:
# Evaluate and report SAMKNN on artificial datasets
results_samknn_artificial = evaluate_model_on_datasets(artificialDatasets, "artificial", model_samKNN, "SAMKNN")


Evaluating on artificial dataset: HyperplaneDataset
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [147.18s]
Processed samples: 100000
Mean performance:
SAMKNN - Accuracy     : 0.8782

Evaluating on artificial dataset: WaveformDataset
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [95.93s]
Processed samples: 100000
Mean performance:
SAMKNN - Accuracy     : 0.8449


In [142]:
# Evaluate and report DWM on real datasets
results_dwm_real = evaluate_model_on_datasets(realWorldDatasets, "real-world", model_dwm, "DWM")


Evaluating on real-world dataset: rialto
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [494.23s]
Processed samples: 82250
Mean performance:
DWM - Accuracy     : 0.3295

Evaluating on real-world dataset: spam
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [432.24s]
Processed samples: 6213
Mean performance:
DWM - Accuracy     : 0.8835


In [143]:
# Evaluate and report DWM on artificial datasets
results_dwm_artificial = evaluate_model_on_datasets(artificialDatasets, "artificial", model_dwm, "DWM")


Evaluating on artificial dataset: HyperplaneDataset
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [104.68s]
Processed samples: 100000
Mean performance:
DWM - Accuracy     : 0.9306

Evaluating on artificial dataset: WaveformDataset
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [235.97s]
Processed samples: 100000
Mean performance:
DWM - Accuracy     : 0.7965


In [144]:
# Evaluate and report ARF on real datasets
results_arf_real = evaluate_model_on_datasets(realWorldDatasets, "real-world", model_arf, "ARF")


Evaluating on real-world dataset: rialto
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [1356.76s]
Processed samples: 82250
Mean performance:
ARF - Accuracy     : 0.7922

Evaluating on real-world dataset: spam
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [167.90s]
Processed samples: 6213
Mean performance:
ARF - Accuracy     : 0.9501


In [145]:
# Evaluate and report ARF on artificial datasets
results_arf_artificial = evaluate_model_on_datasets(artificialDatasets, "artificial", model_arf, "ARF")


Evaluating on artificial dataset: HyperplaneDataset
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 ################---- [80%] [683.34s]

  self.mdbl_width += self.width


 #################### [100%] [877.58s]
Processed samples: 100000
Mean performance:
ARF - Accuracy     : 0.8711

Evaluating on artificial dataset: WaveformDataset
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [1338.46s]
Processed samples: 100000
Mean performance:
ARF - Accuracy     : 0.8337


In [146]:
# Evaluate and report LB on real datasets
results_lb_real = evaluate_model_on_datasets(realWorldDatasets, "real-world", model_lb, "LB")


Evaluating on real-world dataset: rialto
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [3255.30s]
Processed samples: 82250
Mean performance:
LB - Accuracy     : 0.8463

Evaluating on real-world dataset: spam
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [4054.24s]
Processed samples: 6213
Mean performance:
LB - Accuracy     : 0.9390


In [147]:
# Evaluate and report LB on artificial datasets
results_lb_artificial = evaluate_model_on_datasets(artificialDatasets, "artificial", model_lb, "LB")


Evaluating on artificial dataset: HyperplaneDataset
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [2189.38s]
Processed samples: 100000
Mean performance:
LB - Accuracy     : 0.7277

Evaluating on artificial dataset: WaveformDataset
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [3602.60s]
Processed samples: 100000
Mean performance:
LB - Accuracy     : 0.7528


In [12]:
# Evaluate and report MLP on real datasets
results_mlp_real = evaluate_model_on_datasets(realWorldDatasets, "real-world", model_mlp, "MLP")


Evaluating on real-world dataset: rialto
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [95.62s]
Processed samples: 82250
Mean performance:
MLP - Accuracy     : 0.4977

Evaluating on real-world dataset: spam
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [8.55s]
Processed samples: 6213
Mean performance:
MLP - Accuracy     : 0.9614


In [13]:
# Evaluate and report MLP on artificial datasets
results_mlp_artificial = evaluate_model_on_datasets(artificialDatasets, "artificial", model_mlp, "MLP")


Evaluating on artificial dataset: HyperplaneDataset
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [114.57s]
Processed samples: 100000
Mean performance:
MLP - Accuracy     : 0.9264

Evaluating on artificial dataset: WaveformDataset
Prequential Evaluation
Evaluating 1 target(s).
Evaluating...
 #################### [100%] [113.80s]
Processed samples: 100000
Mean performance:
MLP - Accuracy     : 0.8449


In [17]:
# Generate summary reports from the .csv files
def generate_summary_report(datasets_dict, datasets_name, model_name):
    summary = {}

    for dataset_name, data in datasets_dict.items():
        csv_file = f'results_{datasets_name}_{dataset_name}_{model_name}.csv'
        df = pd.read_csv(csv_file, comment='#')

        # Overall accuracy
        overall_accuracy = df['mean_acc_[' + model_name + ']'].iloc[-1]
        summary[dataset_name] = overall_accuracy

    return summary

summary_awe_real = generate_summary_report(realWorldDatasets, "real-world", "AWE")
summary_awe_artificial = generate_summary_report(artificialDatasets, "artificial", "AWE")
summary_samknn_real = generate_summary_report(realWorldDatasets, "real-world", "SAMKNN")
summary_samknn_artificial = generate_summary_report(artificialDatasets, "artificial", "SAMKNN")
summary_dwm_real = generate_summary_report(realWorldDatasets, "real-world", "DWM")
summary_dwm_artificial = generate_summary_report(artificialDatasets, "artificial", "DWM")
summary_arf_real = generate_summary_report(realWorldDatasets, "real-world", "ARF")
summary_arf_artificial = generate_summary_report(artificialDatasets, "artificial", "ARF")
summary_lb_real = generate_summary_report(realWorldDatasets, "real-world", "LB")
summary_lb_artificial = generate_summary_report(artificialDatasets, "artificial", "LB")
summary_mlp_real = generate_summary_report(realWorldDatasets, "real-world", "MLP")
summary_mlp_artificial = generate_summary_report(artificialDatasets, "artificial", "MLP")
# Print them all
print("=== Summary Reports ===")
print("AWE Real World Datasets:", summary_awe_real)
print("AWE Artificial Datasets:", summary_awe_artificial)
print("SAMKNN Real World Datasets:", summary_samknn_real)
print("SAMKNN Artificial Datasets:", summary_samknn_artificial)
print("DWM Real World Datasets:", summary_dwm_real)
print("DWM Artificial Datasets:", summary_dwm_artificial)
print("ARF Real World Datasets:", summary_arf_real)
print("ARF Artificial Datasets:", summary_arf_artificial)
print("LB Real World Datasets:", summary_lb_real)
print("LB Artificial Datasets:", summary_lb_artificial)
print("MLP Real World Datasets:", summary_mlp_real)
print("MLP Artificial Datasets:", summary_mlp_artificial)

=== Summary Reports ===
AWE Real World Datasets: {'rialto': 0.41155, 'spam': 0.723966}
AWE Artificial Datasets: {'HyperplaneDataset': 0.884, 'WaveformDataset': 0.7906}
SAMKNN Real World Datasets: {'rialto': 0.813629, 'spam': 0.9618540000000001}
SAMKNN Artificial Datasets: {'HyperplaneDataset': 0.8782, 'WaveformDataset': 0.8448899999999999}
DWM Real World Datasets: {'rialto': 0.329495, 'spam': 0.8834700000000001}
DWM Artificial Datasets: {'HyperplaneDataset': 0.93056, 'WaveformDataset': 0.79655}
ARF Real World Datasets: {'rialto': 0.7921699999999999, 'spam': 0.950105}
ARF Artificial Datasets: {'HyperplaneDataset': 0.8710700000000001, 'WaveformDataset': 0.83367}
LB Real World Datasets: {'rialto': 0.8462860000000001, 'spam': 0.938999}
LB Artificial Datasets: {'HyperplaneDataset': 0.7276600000000001, 'WaveformDataset': 0.7528199999999999}
MLP Real World Datasets: {'rialto': 0.49773900000000004, 'spam': 0.9613709999999999}
MLP Artificial Datasets: {'HyperplaneDataset': 0.9264100000000001, '

In [7]:
# Generate a plot that plots the different models together on the same plot, for a single dataset, to compare
def plot_comparison(datasets_dict, datasets_name, model_names, dataset_name):
    plt.figure(figsize=(12, 6))

    for model_name in model_names:
        csv_file = f'results_{datasets_name}_{dataset_name}_{model_name}.csv'
        df = pd.read_csv(csv_file, comment='#')

        accuracy_values = df['mean_acc_[' + model_name + ']']
        sample_counts = df['id']

        # Plot the accuracy over time
        plt.plot(sample_counts, accuracy_values, label=model_name)

    plt.title(f'Prequential Accuracy Comparison on {dataset_name} ({datasets_name})')
    plt.xlabel('Number of samples processed')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)
    plt.savefig(f'comparison_{datasets_name}_{dataset_name}.png')
    plt.close()


# Apply the method for all 4 datasets
plot_comparison(artificialDatasets, "artificial", ["AWE", "SAMKNN", "DWM", "ARF", "LB", "MLP"], "WaveformDataset")
plot_comparison(artificialDatasets, "artificial", ["AWE", "SAMKNN", "DWM", "ARF", "LB", "MLP"], "HyperplaneDataset")
plot_comparison(realWorldDatasets, "real-world", ["AWE", "SAMKNN", "DWM", "ARF", "LB", "MLP"], "rialto")
plot_comparison(realWorldDatasets, "real-world", ["AWE", "SAMKNN", "DWM", "ARF", "LB", "MLP"], "spam")
