## Import

In [9]:
from mlprimitives.custom.timeseries_preprocessing import time_segments_aggregate
from mlprimitives.custom.timeseries_preprocessing import rolling_window_sequences
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from notebooks.tulog.model import hyperparameters
from orion.primitives.tadgan import TadGAN
from orion.data import load_signal, load_anomalies
from orion import Orion
from notebooks.tulog.utils import plot, plot_ts, plot_rws, plot_error, unroll_ts

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings
import os

warnings.filterwarnings(action='ignore')

## Load data

In [10]:
data_path = "C:\\Users\\PC\\OneDrive\\문서\\GitHub\\datasets\\"

#Read data
data = pd.read_csv(data_path + "Bearing1_1_top5_result.csv")
data.shape

(2803, 2)

## Nomalization

In [11]:
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()

x = data.values
x_scaled = min_max_scaler.fit_transform(x)
scaled_data = pd.DataFrame(x_scaled)

In [12]:
scaled_data.shape

(2803, 2)

In [13]:
scaled_data = scaled_data[0]

### Windows

In [14]:
window_size=100
windows_normal=scaled_data.values[np.arange(window_size)[None, :] + np.arange(scaled_data.shape[0]-window_size)[:, None]]
windows_normal = np.expand_dims(windows_normal,2)

In [15]:
windows_normal_train = windows_normal[:400]
windows_normal_test = windows_normal[400:]

In [16]:
windows_normal_train.shape

(400, 100, 1)

In [17]:
windows_normal_test.shape

(2303, 100, 1)

## Modeling

* modeling

In [19]:
hyperparameters["epochs"] = 100
hyperparameters["shape"] = (100, 1) # based on the window size
hyperparameters["optimizer"] = "keras.optimizers.Adam"
hyperparameters["learning_rate"] = 0.0005
hyperparameters["latent_dim"] = 20
hyperparameters["batch_size"] = 64

tgan = TadGAN(**hyperparameters)
tgan.fit(windows_normal_test)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Epoch: 1/100, [Dx loss: [ 2.4001782  -0.0377054   0.04182352  0.23960601]] [Dz loss: [ 0.4529787  -0.9207913   0.49542984  0.087834  ]] [G loss: [-0.452523   -0.03832104 -0.45670325  0.00425013]]
Epoch: 2/100, [Dx loss: [ 0.12405848  0.18915579 -0.22797376  0.01628765]] [Dz loss: [-2.2815044  -2.3975399  -1.1813706   0.12974064]] [G loss: [3.3663974  0.33775377 2.7824206  0.0246223 ]]
Epoch: 3/100, [Dx loss: [ 0.28016943  0.7526541  -0.52839875  0.00559141]] [Dz loss: [-2.2626667  -2.111272   -2.4187071   0.22673127]] [G loss: [3.1699412  0.48916006 2.5510538  0.01297276]]
Epoch: 4/100, [Dx loss: [-0.41378355 -1.871019    1.3598007   0.00974345]] [Dz loss: [-0.6308606 -2.2960055  1.4066397  0.0258505]] [G loss: [-2.5742316  -1.3963258  -1.2970697   0.01191639]]
Epoch: 5/100, [Dx loss: [ 0.35460815 -0.9531062   1.1601387   0.01475757]] [Dz loss: [-2.6373913  -3.9523795  -0.40473077  0.17197192

In [None]:
# reconstruct
X_hat, critic = tgan.predict(X)

# visualize X_hat
plot_rws(X_hat)

In [None]:
# flatten the predicted windows
y_hat = unroll_ts(X_hat)

# plot the time series
plot_ts([y, y_hat], labels=['original', 'reconstructed'])

In [None]:
# pair-wise error calculation
error = np.zeros(shape=y.shape)
length = y.shape[0]
for i in range(length):
    error[i] = abs(y_hat[i] - y[i])

# visualize the error curve
fig = plt.figure(figsize=(30, 3))
plt.plot(error)
plt.show()

In [None]:
from orion.primitives.tadgan import score_anomalies

error, true_index, true, pred = score_anomalies(X, X_hat, critic, X_index, rec_error_type="dtw", comb="mult")
pred = np.array(pred).mean(axis=2)

# visualize the error curve
plot_error([[true, pred], error])

In [None]:
# threshold
thresh = 10

intervals = list()

i = 0
max_start = len(error)
while i < max_start:
    j = i
    start = index[i]
    while error[i] > thresh:
        i += 1
    
    end = index[i]
    if start != end:
        intervals.append((start, end, np.mean(error[j: i+1])))
        
    i += 1
        
intervals

In [None]:
import pandas as pd
anomalies = pd.DataFrame(intervals, columns=['start', 'end', 'score'])
plot(df, [anomalies, known_anomalies])

* iteration

In [10]:
hyperparameters["epochs"] = 100
hyperparameters["shape"] = (100, 1) # based on the window size
hyperparameters["optimizer"] = "keras.optimizers.Adam"
hyperparameters["learning_rate"] = 0.0005
hyperparameters["latent_dim"] = 20
hyperparameters["batch_size"] = 64

# 0-1 scaling -> change activation function of output layers to sigmoid (default: tanh)
hyperparameters['layers_generator'][7]['parameters']['activation'] = 'sigmoid'

In [13]:
def recons_error_plot(y, y_hat, save_name) :
    # pair-wise error calculation
    error = np.zeros(shape=y.shape)
    length = y.shape[0]
    for i in range(length):
        error[i] = abs(y_hat[i] - y[i])

    # visualize the error curve
    fig = plt.figure(figsize=(30, 3))
    plt.plot(error)
    plt.savefig('{}.png'.format(save_name), dpi=300)
    plt.show()

In [None]:
from orion.evaluation.contextual import contextual_accuracy, contextual_f1_score
from orion.primitives.tadgan import score_anomalies
import pandas as pd

for idx in range(1, 20) :
    tadgan = TadGAN(**hyperparameters)
    tadgan.fit(X)
    # reconstruct
    X_hat, critic = tadgan.predict(X)

    # visualize X_hat
    plot_rws(X_hat, save_opt= True, save_name = plt_save_path+"x_hat_plot_rws_{}".format(idx))
    # flatten the predicted windows
    y_hat = unroll_ts(X_hat)

    # plot the time series
    plot_ts([y, y_hat], labels=['original', 'reconstructed'], save_opt= True, save_name = plt_save_path+"plot_ts_{}".format(idx))
    recons_error_plot(y, y_hat, save_name = plt_save_path+"recons_error_{}".format(idx))

    error, true_index, true, pred = score_anomalies(X, X_hat, critic, X_index, rec_error_type="dtw", comb="mult")
    pred = np.array(pred).mean(axis=2)

    # visualize the error curve
    plot_error([[true, pred], error], save_opt= True, save_name = plt_save_path+"plot_error_{}".format(idx))

    # threshold
    thresh = 10

    intervals = list()

    i = 0
    max_start = len(error)
    while i < max_start:
        j = i
        start = index[i]
        while error[i] > thresh:
            i += 1

        end = index[i]
        if start != end:
            intervals.append((start, end, np.mean(error[j: i+1])))

        i += 1

    anomalies = pd.DataFrame(intervals, columns=['start', 'end', 'score'])
    ground_truth = load_anomalies(signal)
    accuracy = contextual_accuracy(ground_truth, anomalies, start=start, end=end)
    f1_score = contextual_f1_score(ground_truth, anomalies, start=start, end=end)
    plot(df, [anomalies, known_anomalies], save_opt= True, save_name = plt_save_path+"plot_{}".format(idx), acc = accuracy, f1=f1_score)
    anomalies.to_csv(plt_save_path + "anomalies_{}.csv".format(idx), index = False)

* 참고: https://ichi.pro/ko/sigyeyeol-isang-tamji-dib-leoning-sidae-264035144704586