# sleepの出力で後処理も行う

In [1]:
import sys, os
sys.path.append(os.pardir)

In [5]:
from pathlib import Path
import numpy as np
import polars as pl
import os
from hydra import initialize, compose

with initialize(config_path="../run/conf", version_base=None):
    cfg = compose("cv_score", overrides=["exp_name=exp013"])

In [3]:
# series の読み込み
train_df = pl.read_parquet(Path(cfg.dir.data_dir) / "train_series.parquet")
train_df = train_df.with_columns(
            pl.col("timestamp").str.to_datetime("%Y-%m-%dT%H:%M:%S%z")
        )
event_df = pl.read_csv(Path(cfg.dir.data_dir) / "train_events.csv").drop_nulls()
event_df = event_df.with_columns(
    pl.col("timestamp").str.to_datetime("%Y-%m-%dT%H:%M:%S%z")
)

In [18]:
# 予測結果の読み込み
exp_dir = Path(os.path.join(cfg.base_dir, cfg.exp_name, "cv"))

keys_list = []
preds_list = []
for fold in range(cfg.num_fold):
    preds_list.append(np.load(exp_dir / f"preds_fold{fold}.npy"))
    keys_list.append(np.load(exp_dir / f"keys_fold{fold}.npy"))
preds = np.concatenate(preds_list, axis=0)
keys = np.concatenate(keys_list, axis=0)

In [19]:
import numpy as np
import polars as pl
from scipy.signal import find_peaks
import logging
from pathlib import Path


def post_process_for_seg_sleep(
    keys: list[str],
    preds_all: np.ndarray,
    score_th: float = 0.01,
    distance: int = 5000,
    periodicity_dict: dict[np.ndarray] | None = None,
) -> pl.DataFrame:
    """make submission dataframe for segmentation task

    Args:
        keys (list[str]): list of keys. key is "{series_id}_{chunk_id}"
        preds (np.ndarray): (num_series * num_chunks, duration, 2)
        score_th (float, optional): threshold for score. Defaults to 0.5.
        distance (int, optional): distance for peaks. Defaults to 5000.
        periodicity_dict (dict[np.ndarray], optional): series_id を key に periodicity の 1d の予測結果を持つ辞書. 値は 0 or 1 の np.ndarray. Defaults to None.

    Returns:
        pl.DataFrame: submission dataframe
    """

    preds = preds_all[:,:,[1,2]]
    
    series_ids = np.array(list(map(lambda x: x.split("_")[0], keys)))
    unique_series_ids = np.unique(series_ids)

    records = []
    for series_id in unique_series_ids:
        series_idx = np.where(series_ids == series_id)[0]
        this_series_preds = preds[series_idx].reshape(-1, 2)
        if periodicity_dict is not None:
            this_series_preds = this_series_preds[: len(periodicity_dict[series_id]), :]
            this_series_preds *= 1 - periodicity_dict[series_id][:, None]  # periodicity があるところは0にする
        
        this_series_preds *= 1 - preds_all[series_idx][:, :, 0].reshape(-1, 1) # sleep をかけて減衰

        for i, event_name in enumerate(["onset", "wakeup"]):
            this_event_preds = this_series_preds[:, i]
            steps = find_peaks(this_event_preds, height=score_th, distance=distance)[0]
            scores = this_event_preds[steps]

            for step, score in zip(steps, scores):
                records.append(
                    {
                        "series_id": series_id,
                        "step": step,
                        "event": event_name,
                        "score": score,
                    }
                )

    if len(records) == 0:  # 一つも予測がない場合はdummyを入れる
        records.append(
            {
                "series_id": series_id,
                "step": 0,
                "event": "onset",
                "score": 0,
            }
        )

    sub_df = pl.DataFrame(records).sort(by=["series_id", "step"])
    row_ids = pl.Series(name="row_id", values=np.arange(len(sub_df)))
    sub_df = sub_df.with_columns(row_ids).select(["row_id", "series_id", "step", "event", "score"])
    return sub_df


In [20]:
from src.utils.metrics import event_detection_ap
from src.utils.post_process import post_process_for_seg

submission_df = post_process_for_seg(
    keys,
    preds,
    score_th=cfg.post_process.score_th,
    distance=cfg.post_process.distance,
    periodicity_dict=None,
)
score = event_detection_ap(
    event_df.to_pandas(),
    submission_df.to_pandas(),
)
score

0.0019577547290630347

In [15]:
from src.utils.metrics import event_detection_ap

submission_df = post_process_for_seg_sleep(
    keys,
    preds,
    score_th=cfg.post_process.score_th,
    distance=cfg.post_process.distance,
    periodicity_dict=None,
)
score = event_detection_ap(
    event_df.to_pandas(),
    submission_df.to_pandas(),
)
score

0.6853890686877171