In [2]:
from hydra import initialize, compose

with initialize(config_path="../run/conf", version_base=None):
    cfg = compose("score", overrides=["exp=exp007_0", "split=stratify_fold_0"])

In [3]:
import sys, os
sys.path.append(os.pardir)

In [23]:
from pathlib import Path
import numpy as np
import polars as pl
import os
import matplotlib.pyplot as plt


In [5]:
series_id = "0a96f4993bd7"
series_dir = Path(cfg.dir.processed_dir) / "train" / series_id

series_dir

PosixPath('/tmp/working/processed/train/0a96f4993bd7')

In [17]:
train_df = pl.read_parquet(Path(cfg.dir.data_dir) / "train_series.parquet", n_rows=3000000).to_pandas()
train_events_df = pl.read_csv(Path(cfg.dir.data_dir) / "train_events.csv").drop_nulls().to_pandas()

In [18]:
train_df['series_id'].unique()

array(['038441c925bb', '03d92c9f6f8a', '0402a003dae9', '04f547b8017d',
       '05e1944c3818', '062cae666e2a', '062dbd4c95e6'], dtype=object)

In [26]:
3600 * 8 /5

5760.0

In [24]:
def plot_series_with_events(series_id, train_series, train_events):
    # Filter the DataFrame based on the series_id
    sample_serie = train_series[train_series['series_id'] == series_id]
    
    # Filter event data based on the series_id
    sample_events = train_events[train_events['series_id'] == series_id]
    sample_onset = sample_events.loc[sample_events['event'] == 'onset', 'timestamp'].dropna()
    sample_wakeup = sample_events.loc[sample_events['event'] == 'wakeup', 'timestamp'].dropna()
    
    # Helper function to plot data and events
    def plot_data_and_events(data, ylabel):
        plt.figure(figsize=(20, 3))
        plt.plot(sample_serie['timestamp'], sample_serie[data], label=data, linewidth=1)
        
        for onset in sample_onset:
            plt.axvline(x=onset, color='r', linestyle='--', label='onset')
            
        for wakeup in sample_wakeup:
            plt.axvline(x=wakeup, color='g', linestyle='--', label='wakeup')
            
        handles, labels = plt.gca().get_legend_handles_labels()
        new_labels, new_handles = [], []
        for handle, label in zip(handles, labels):
            if label not in new_labels:
                new_handles.append(handle)
                new_labels.append(label)
        
        plt.legend(new_handles, new_labels)
        plt.xlabel('Timestamp')
        plt.ylabel(ylabel)
        plt.title(f'{ylabel} over Time with Event Flags - '+series_id)
        plt.show()
    
    # Plot enmo and anglez
    plot_data_and_events('enmo', 'ENMO Value')
    plot_data_and_events('anglez', 'anglez Value')

In [None]:
# '038441c925bb', '03d92c9f6f8a', '0402a003dae9', '04f547b8017d', '05e1944c3818', '062cae666e2a'
plot_series_with_events('038441c925bb', train_df, train_events_df)

In [None]:
def downsample(sequence, factor=10):
    """
    Downsamples the sequence by the given factor.
    """
    return sequence[::factor]

def resize_1d_array(array, new_size):
    """
    Resizes a 1D numpy array to a new size using interpolation.

    :param array: 1D numpy array
    :param new_size: New size of the array
    :return: Resized array
    """
    return np.interp(
        np.linspace(0, len(array) - 1, new_size),
        np.arange(len(array)),
        array
    )

def detect_periodicity(seq: np.ndarray)->np.ndarray:
    one_day_step = 3600 * 24 // 5
    downsample_rate = 120
    split_hour = 8 # 24 の約数

    # seq をダウンサンプリングして seq_downsampled に
    seq_downsampled = downsample(seq, downsample_rate)

    # seq_downsampled を split_hour ごとに分割した chunks (chunk_num, d) を作る（足りない部分は0埋め）
    split_step = 8 * 3600 // 5 // downsample_rate # 8 時間おき。24の約数にすること
    valid_length = ((len(seq_downsampled)+(split_step-1))//split_step)*split_step # split_step に合うように
    seq_downsampled_padded = np.zeros(valid_length)
    seq_downsampled_padded[:len(seq_downsampled)] = seq_downsampled
    chunks = seq_downsampled_padded.reshape(-1, split_step)

    # 各ベクトルを正規化し chunks・chunks.T で (chunk_num,chunk_num) のcos類似度を求め、対角線上を0にした後にmaxを取って chunk_num サイズの予測 pred_chunk を得る
    norm_vecs = chunks/np.linalg.norm(chunks, axis=1, keepdims=True)
    cosine_sim_matrix = np.dot(norm_vecs, norm_vecs.T)
    cosine_sim_matrix[range(len(cosine_sim_matrix)), range(len(cosine_sim_matrix))] = 0
    pred_chunk = cosine_sim_matrix.max(axis=0) > 0.99

    # 最後の一個前が true なら、最後もtrueにする（最後は0埋めしたのでうまくできていない）
    pred_chunk[-1] = pred_chunk[-2:-1].max()

    # pred_vecを元のsequenceのサイズに戻す
    pred = resize_1d_array(pred_chunk.repeat(split_step)[:len(seq_downsampled)], len(seq))
    return pred

