In [1]:
import numpy as np
import sklearn
%matplotlib inline
import matplotlib
import pandas as pd
import matplotlib.pyplot as plt

import onlinecp.algos as algos
import onlinecp.utils.feature_functions as feat
import onlinecp.utils.gendata as gd
import onlinecp.utils.evaluation as ev
from onlinecp.utils.datasets import pw_linear, pw_constant
from experiments import Experiment

In [2]:
# GMM dataset, note nb_change=1 means no CPs will be in dataset, each CP is seperated by n points
X2, truth = gd.stream_GMM(d=15, nb_change=10, n=2500)

print(f'GMM Dataset has {X2.shape[0]} data points and {X2.shape[1]} features')
print(f'There are {truth.sum()} change points')

GMM Dataset has 25000 data points and 15 features
There are 9.0 change points


In [3]:
exp = Experiment(X2, truth, 10, 'newmaRFF')

In [4]:
exp.run_algo()

Start algo  newmaRFF ...


In [5]:
exp.get_results()

Omitting first 3 distributions
Calculating performance with 30 points
Evaluate performance 1 / 30
{'num_changes': 9, 'EDD': 164.11111111111111, '%_missed': 0.0, 'total_missed': 0, 'total_false_alarms': 1, 'false_alarm_rate': 0.1111111111111111, 'ttfas': [350], 'avg_ttfa': 350.0, 'cp': array([False, False, False, ..., False, False, False])}
Evaluate performance 2 / 30
{'num_changes': 9, 'EDD': 164.66666666666666, '%_missed': 0.0, 'total_missed': 0, 'total_false_alarms': 0, 'false_alarm_rate': 0.0, 'ttfas': [], 'avg_ttfa': 0.0, 'cp': array([False, False, False, ..., False, False, False])}
Evaluate performance 3 / 30
{'num_changes': 9, 'EDD': 165.55555555555554, '%_missed': 0.0, 'total_missed': 0, 'total_false_alarms': 0, 'false_alarm_rate': 0.0, 'ttfas': [], 'avg_ttfa': 0.0, 'cp': array([False, False, False, ..., False, False, False])}
Evaluate performance 4 / 30
{'num_changes': 9, 'EDD': 165.55555555555554, '%_missed': 0.0, 'total_missed': 0, 'total_false_alarms': 0, 'false_alarm_rate':

{'num_changes': 9, 'EDD': 86.75, '%_missed': 11.11111111111111, 'total_missed': 1, 'total_false_alarms': 58, 'false_alarm_rate': 6.444444444444445, 'ttfas': [329, 705, 465, 404, 360, 658, 1125, 1033], 'avg_ttfa': 634.875, 'cp': array([False, False, False, ..., False, False, False])}


{'EDD': array([164.11111111, 164.66666667, 165.55555556, 165.55555556,
        167.44444444, 187.22222222, 174.375     , 174.5       ,
        175.75      , 176.        , 176.375     , 176.5       ,
        176.875     , 177.125     , 177.5       , 186.5       ,
        188.125     , 190.        , 191.25      , 193.        ,
        194.28571429, 201.71428571, 202.28571429, 207.        ,
        207.57142857, 215.14285714, 215.57142857, 235.14285714,
        235.71428571, 236.        ]),
 'FA': array([0.11111111, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ]),
 'ND': array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0. 

In [None]:
exp.statistics

In [None]:
flagged = np.zeros(25000)

In [None]:
where_change

In [None]:
flagged[2502]=1

In [None]:
flagged[4998]=1
flagged[4996]=1

In [None]:
flagged[8998]=1

In [None]:
flagged[6000]=1

In [None]:
truth.shape

In [None]:
flagged.sum()

In [None]:
(cp!=flagged).sum()

In [None]:
cp[1999]

In [None]:
n = truth.shape[0]
if n != flagged.shape[0]:
    print('error', n, flagged.shape[0])
# change flagged into change point, going from 0 to 1
cp = np.zeros(n, dtype=bool)
for i in range(n-1):
    # consecutive flags are disregarded and only 1 is kepty
    if not flagged[i] and flagged[i + 1]:
        cp[i] = 1

EDD, not_detected, FA = 0, 0, 0
num_change = int(truth.sum())
where_change = np.concatenate((np.argwhere(truth).flatten(), np.array([n])))

total_delays, total_undetected = 0, 0
ttfas = []

for i in range(num_change):
    begin_ind = where_change[i]
    end_ind = where_change[i + 1]
    middle_ind = int((begin_ind + end_ind) / 2)
    j = begin_ind
    while j <= middle_ind and not cp[j]:
        j = j+1
    if cp[j]:
        delay = j - begin_ind
        total_delays += j - begin_ind
    else:
        total_undetected += 1
        not_detected += 1
    false_alarms = cp[middle_ind:end_ind].sum()
    if false_alarms > 0:
        ttfa = np.where(cp[middle_ind:end_ind])[0][0]
        ttfas.append(ttfa)
        
    FA += cp[middle_ind:end_ind].sum()
    
{'num_changes':num_change,
 'EDD': total_delays / np.max((num_change - not_detected, 1)),
 '%_missed': 100 * not_detected / num_change,
 'total_missed': total_undetected,
 'total_false_alarms': FA,
 'false_alarm_rate': FA / num_change,
 'ttfas': ttfas,
 'avg_ttfa':sum(ttfas) / len(ttfas),
 'cp': cp}

In [None]:
cp.shape

In [None]:
cp[3750:5000].sum()

In [None]:
np.where(cp[3750:5000])[0][0]

In [None]:
np.where(cp)

In [None]:
8/9

In [None]:
EDD,FA,not_detected

In [None]:
where_change

In [None]:
detection_stat.shape

In [None]:
online_th.shape

In [None]:
# display perf
EDD, FA, ND = ev.compute_curves(ground_truth, detection_stat, num_points=30, start_coeff=1.05, end_coeff=1.2)
EDDth, FAth, NDth = ev.compute_curves(ground_truth, detection_stat, num_points=30,
                                  thres_values=online_th, start_coeff=1, end_coeff=1)

In [None]:
exp.plot_stat_time_series()

In [None]:
# Linear dataset
XL, _, truth = pw_linear(n_samples=20000, n_features=15, n_bkps=10, noise_std=0.1)

print(f'Linear Dataset has {XL.shape[0]} data points and {XL.shape[1]} features')
print(f'There are {truth.sum()} change points')

In [None]:
# Wavy dataset
XW, _, truth = pw_linear(n_samples=20000, n_bkps=1, noise_std=0.1)

print(f'Wavy Dataset has {XW.shape[0]} data points and {XW.shape[1]} features')
print(f'There are {truth.sum()} change points')

In [None]:
exp = Experiment(XW, 'ScanB')

In [None]:
exp.run_algo()

In [None]:
exp.plot_stat_time_series()

In [None]:
exp.set_ttfa(1000)

In [None]:
exp.statistics

In [None]:
XW

In [None]:
XW[:,1]

In [None]:
import plotly.graph_objects as go

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=[i for i in range(XW.shape[0])], y=XW[:,0],
                         mode='lines',
                         name=f'Signal'))
# fig.add_trace(go.Scatter(x=[i for i in range(self.signal.shape[0])], y=[i[1] for i in self.statistics],
#                          mode='lines',
#                          name=f'{self.algo} adaptive threshold'))
fig.update_layout(title=f'Signal over time',
                  xaxis_title="Time",
                  yaxis_title="Statistic",
                  )
fig.show()