In [None]:
from pathlib import Path

import pandas as pd
import numpy as np
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import stumpy
import plotly.express as px
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

from var import DATA_IN, DATA_OUT, START_DATE, END_DATE
from src.io import read_time_series
from src.preprocess import resample_time_series

In [None]:
df_tid = read_time_series(
    Path(DATA_IN, 'TID_catalog.csv'),
    column_names=[
        'duration',
        'period',
        'amplitude',
        'spectral_contribution',
        'velocity',
        'azimuth',
        'quality_index',
        'datetime',
    ],
)

In [None]:
df_tid_res = resample_time_series(
    df_tid[df_tid['quality_index'].ge(.6)],
    aggregation_function='max',
    time_interval='6H'
)

In [None]:
# 324
df_tid_res.notna().sum()

In [None]:
df_tid_res['is_tid'] = np.where(
    df_tid_res['quality_index'].notna(),
    1,
    0,
)

In [None]:
plot_acf(
    df_tid_res['is_tid'],
    lags=41,
);

## Motifs

In [None]:
df = pd.read_pickle(Path(DATA_OUT, 'df_dataset.pickle'))

In [None]:
ANNO = '2020'
FEATURE = 'iu_fix'

In [None]:
n_hours = 6
window_size = 2 * n_hours

mp = stumpy.stump(
    df.loc[ANNO][FEATURE],
    m=window_size,
)

In [None]:
px.line(
    mp[:,0]
)

In [None]:
df_mp = pd.DataFrame(
    mp,
    columns=[
        'profile',
        'profile idx',
        'left profile idx',
        'right profile idx',
    ],
)

In [None]:
best_motif_idx = df_mp[
    df_mp['profile'].le(df_mp['profile'].quantile(.005))
].index

In [None]:
x = df.loc[ANNO].reset_index().index
y = df.loc[ANNO][FEATURE]

In [None]:
profile = df_mp['profile']

fig, ax = plt.subplots(2, figsize=(16,8), sharex=True)
ax[0].plot(x, y)
ax[1].plot(profile.index, profile)

for idx in best_motif_idx:
    ax[0].axvline(x=idx, color="green")
    ax[1].axvline(x=idx, color="green")
    rect = Rectangle(
        (idx, 0),
        window_size,
        40,
        facecolor="lightgrey",
    )
    ax[0].add_patch(rect)

In [None]:
N = 6

setx = df_mp[
    df_mp['profile'].le(df_mp['profile'].quantile(0.05))
].sort_values('profile').iloc[N:N+2].index

In [None]:
fig, ax = plt.subplots(figsize=(16,4))

for idx in setx:
    plot_y = y.iloc[idx:(idx+window_size)].to_list()
    ax.plot(plot_y)

In [None]:
df.loc[ANNO].reset_index().loc[setx]

In [None]:
start_1, end_1 = '2020-11-02 15:30', '2020-11-02 21:30'
start_2, end_2 = '2020-11-19 17:00', '2020-11-19 23:00'

fig = px.line(
    df.loc['2020-11-02':'2020-11-19'][FEATURE]
)

fig.add_shape(
    type='rect',
    x0=start_1,
    x1=end_1,
    y0=fig.data[0].y.min(),
    y1=fig.data[0].y.max(),
    line=dict(color='red', width=2),
    fillcolor='rgba(255, 0, 0, 0.2)'
)

fig.add_shape(
    type='rect',
    x0=start_2,
    x1=end_2,
    y0=fig.data[0].y.min(),
    y1=fig.data[0].y.max(),
    line=dict(color='red', width=2),
    fillcolor='rgba(255, 0, 0, 0.2)'
)

In [None]:
df.loc['2020-11-02':'2020-11-19']['tid_within_3h'].value_counts()