In [1]:
import numpy as np
import sklearn 

import onlinecp.algos as algos
import onlinecp.utils.feature_functions as feat
import onlinecp.utils.gendata as gd
import onlinecp.utils.evaluation as ev
from onlinecp.utils.datasets import pw_linear, pw_constant

In [2]:
%matplotlib inline

import matplotlib
import numpy as np
import matplotlib.pyplot as plt

In [None]:
gd.stream_GMM(d=10, k=3, n=5000, nb_change=0)

In [None]:
Y, _, _ = pw_linear(500, 4)

In [None]:
Y.shape

In [None]:
d=4
n=10000
X, cps, ground_truth = pw_constant(n, d, 4, noise_std=2)

In [None]:
np.where(ground_truth==1)[0].min()

In [None]:
X=X[:,:4]

In [None]:
# common config
choice_sigma = 'median'
numel = 100
data_sigma_estimate = X[:numel]  # data for median trick to estimate sigma
B = 250  # window size

# Newma and MA config
big_Lambda, small_lambda = algos.select_optimal_parameters(B)  # forget factors chosen with heuristic in the paper
thres_ff = small_lambda
# number of random features is set automatically with this criterion
m = int((1 / 4) / (small_lambda + big_Lambda) ** 2)
m_OPU = 10 * m
W, sigmasq = feat.generate_frequencies(m, d, data=data_sigma_estimate, choice_sigma=choice_sigma)

algo = 'newmaRFF'
print('Start algo ', algo, '...')
print('# RF: ', m)

def feat_func(x):
    return feat.fourier_feat(x, W)

detector = algos.NEWMA(X[0], forget_factor=big_Lambda, forget_factor2=small_lambda, feat_func=feat_func,
                       adapt_forget_factor=thres_ff)
detector.apply_to_data(X)

In [None]:
newma_stat

In [None]:
newma_stat = np.array([i[0] for i in detector.stat_stored])

In [None]:
online_th = np.array([i[1] for i in detector.stat_stored])

In [None]:
np.where(ground_truth==1)

In [None]:
plt.figure()
plt.scatter([i for i in range(newma_stat.shape[0])], newma_stat)
plt.scatter([i for i in range(newma_stat.shape[0])], online_th)
plt.xlabel('Time')
plt.ylabel('Stat')
plt.show()

In [None]:
EDD, FA, ND = ev.compute_curves(ground_truth, detection_stat, num_points=30, start_coeff=1.05, end_coeff=1.2)
EDDth, FAth, NDth = ev.compute_curves(ground_truth, detection_stat, num_points=30,
                                      thres_values=online_th, start_coeff=1, end_coeff=1)

In [None]:
ground_truth

In [None]:
!  python test_algo_on_data.py 'newmaRFF' 'tyler'  -n 200 -nb 50 -d 1 -show 

In [None]:
t = np.load('tyler.npz')

In [None]:
t.__dict__

In [None]:
ev.compute_curves()

In [None]:
t['ground_truth']

In [None]:
t['online_th']

In [None]:
t['detection_stat']

In [None]:
t['EDDth']

In [None]:
Y, cps = pw_normal(1000, 2)

In [None]:
Y.shape

In [None]:
cps[-1]

In [None]:
from sklearn.datasets import make_blobs

In [None]:
# X, y = make_blobs(n_samples=1000, centers=2, n_features=2, random_state=0)

In [None]:
plt.figure()
plt.scatter(X[:,0], X[:,1])
# plt.plot(FAth, NDth, 'o', markersize=20)
plt.xlabel('False Alarm')
plt.ylabel('Missed Detection')
plt.show()

In [None]:
np.array(cps)

In [None]:
plt.figure()
plt.plot(t['FA'], t['ND'], '-o')
# plt.plot(FAth, NDth, 'o', markersize=20)
plt.xlabel('False Alarm')
plt.ylabel('Missed Detection')
plt.show()

In [None]:
parser.add_argument('-n', type=int, default=2000, help='number of samples for each distribution')
parser.add_argument('-nb', type=int, default=500, help='number of changes of distribution in the series')
parser.add_argument('-d', type=int, default=100, help='dimensionality of the samples in the time series')

In [None]:
# default data settings
n=200 # distance between cps, the first CP is at n, then 2n, 3n etc, last one is at n*(nb) - n
nb_change=2
d=2

In [None]:
# parameters of gmm
k = 3
std_mean = 10  # the bigger, the more change in means
wishart = 5  # the bigger, the less change in diagonal variances

X, ground_truth = gd.stream_GMM(d=d, n=n, nb_change=nb_change, std_mean=std_mean, 
                                concentration_wishart=wishart, k=k)

In [None]:
ground_truth.shape

In [None]:
import pandas as pd

In [None]:
plt.figure()
plt.plot([i for i in range(Y.shape[0])], pd.DataFrame(Y)[0].values, '-o')
# plt.plot(FAth, NDth, 'o', markersize=20)
plt.xlabel('False Alarm')
plt.ylabel('Missed Detection')
plt.show()

In [None]:
plt.figure()
plt.plot([i for i in range(X.shape[0])], pd.DataFrame(X)[1].values, '-o')
# plt.plot(FAth, NDth, 'o', markersize=20)
plt.xlabel('False Alarm')
plt.ylabel('Missed Detection')
plt.show()

In [None]:
# each row is a data point, each col is a feature/dimension
X.shape

In [51]:
X1, _, _ = pw_constant(5000, 10, 0, 0.5)

In [25]:
X2, truth = gd.stream_GMM(d=15, nb_change=0, n=1000)

In [26]:
from experiments import Experiment

In [52]:
exp = Experiment(X1, 'newmaRFF', [5,6,7,8])

In [53]:
exp.run_algo()

Start algo  newmaRFF ...


In [54]:
exp.plot_stat_time_series()

In [55]:
exp.plot_stat_distribution()