In [4]:
# parameters
config_dir = "../experiments"
exp_name = "902_optuna/002"

In [31]:
%cd /kaggle/working

from pathlib import Path

from hydra import compose, initialize
from omegaconf import OmegaConf

with initialize(
    version_base=None, config_path=f"{config_dir}/{exp_name.split('/')[0]}"
):
    cfg = compose(
        config_name="config.yaml",
        overrides=[f"exp={exp_name.split('/')[-1]}"],
        return_hydra_config=True,
    )
import pickle

import matplotlib.pyplot as plt
import numpy as np
import polars as pl
import seaborn as sns

# 定数定義
output_dir = Path(f"output/experiments/{exp_name}")
gcs_path = f"gs://{cfg.dir.gcs_bucket}/{cfg.dir.gcs_base_dir}/experiments/{exp_name}/"

# カラムの定義
single_targets = [
    "cam_out_NETSW",
    "cam_out_FLWDS",
    "cam_out_PRECSC",
    "cam_out_PRECC",
    "cam_out_SOLS",
    "cam_out_SOLL",
    "cam_out_SOLSD",
    "cam_out_SOLLD",
]
seq_targets = [
    "ptend_t",
    "ptend_q0001",
    "ptend_q0002",
    "ptend_q0003",
    "ptend_u",
    "ptend_v",
]
target_columns = []
for col in seq_targets:
    for i in range(60):
        target_columns.append(f"{col}_{i}")
target_columns.extend(single_targets)

/kaggle/working


In [32]:
kami_sub = pl.read_parquet(gcs_path + "submission.parquet", retries=5)

In [33]:
takoi_sub = pl.read_parquet(
    "gs://kaggle-leap/kami/ex123_124_130_131_133_134_135_ensemble.parquet"
)

In [42]:
import pandas as pd

from utils.metric import score

preds = kami_sub[:, 1:].to_numpy()
labels = takoi_sub[:, 1:].to_numpy()

_predict_df = pd.DataFrame(
    preds, columns=[i for i in range(preds.shape[1])]
).reset_index()
_label_df = pd.DataFrame(
    labels, columns=[i for i in range(labels.shape[1])]
).reset_index()
r2_scores = score(_label_df, _predict_df, "index", multioutput="raw_values")

In [43]:
r2_score_dict = {
    col: r2 for col, r2 in dict(zip(cfg.cols.col_names, r2_scores)).items()
}

In [47]:
r2_score_dict

{'ptend_t_0': 0.9141481664718865,
 'ptend_t_1': 0.8338239811469996,
 'ptend_t_2': 0.9904719785565397,
 'ptend_t_3': 0.9979960918785808,
 'ptend_t_4': 0.9994059881507118,
 'ptend_t_5': 0.9994254608151154,
 'ptend_t_6': 0.9994249923283972,
 'ptend_t_7': 0.999377649115782,
 'ptend_t_8': 0.99927737157223,
 'ptend_t_9': 0.9991855973705818,
 'ptend_t_10': 0.9992017471544716,
 'ptend_t_11': 0.9991347996139884,
 'ptend_t_12': 0.9989703665729264,
 'ptend_t_13': 0.9988767239075806,
 'ptend_t_14': 0.9987580775393252,
 'ptend_t_15': 0.9984540180315007,
 'ptend_t_16': 0.9887781387672581,
 'ptend_t_17': 0.8341328449244259,
 'ptend_t_18': 0.940057927621045,
 'ptend_t_19': 0.9491901126018945,
 'ptend_t_20': 0.9565306717354142,
 'ptend_t_21': 0.971304818449109,
 'ptend_t_22': 0.9765998012679793,
 'ptend_t_23': 0.9824670538041873,
 'ptend_t_24': 0.9861448522565806,
 'ptend_t_25': 0.9901330395359538,
 'ptend_t_26': 0.9923753873250898,
 'ptend_t_27': 0.9942539216696782,
 'ptend_t_28': 0.9952074908147106,


In [36]:
for key, val in r2_score_dict.items():
    if val < 0.9:
        print(key, val)

ptend_t_1 0.8338239811469996
ptend_t_17 0.8341328449244259
ptend_q0001_14 0.7142419565917002
ptend_q0001_15 -0.39195517364672283
ptend_q0001_16 0.798761040496495
ptend_q0001_17 0.539621639601469
ptend_q0001_18 0.7226863383299738
ptend_q0002_28 0.03544894205125815
ptend_q0003_15 -1.3326470718960994
ptend_q0003_16 0.18885631697047534
ptend_q0003_17 0.5584758708186639
ptend_q0003_18 0.7606862424670793
ptend_q0003_28 0.7546021777542491
ptend_q0003_29 0.832351630527032


In [51]:
fill_kami_sub = kami_sub.with_columns(
    takoi_sub[
        [
            "ptend_t_1",
            "ptend_t_17",
            "ptend_q0001_14",
            "ptend_q0001_15",
            "ptend_q0001_16",
            "ptend_q0001_17",
            "ptend_q0001_18",
            "ptend_q0002_28",
            "ptend_q0003_15",
            "ptend_q0003_16",
            "ptend_q0003_17",
            "ptend_q0003_18",
            "ptend_q0003_28",
            "ptend_q0003_29",
        ]
    ],
)

In [39]:
fill_kami_sub.write_parquet("output/fill_902_optuna_002.parquet")

In [52]:
import pandas as pd

from utils.metric import score

preds = fill_kami_sub[:, 1:].to_numpy()
labels = takoi_sub[:, 1:].to_numpy()

_predict_df = pd.DataFrame(
    preds, columns=[i for i in range(preds.shape[1])]
).reset_index()
_label_df = pd.DataFrame(
    labels, columns=[i for i in range(labels.shape[1])]
).reset_index()
r2_scores = score(_label_df, _predict_df, "index", multioutput="raw_values")

for key, val in r2_score_dict.items():
    if val < 0.9:
        print(key, val)

ptend_t_1 0.8338239811469996
ptend_t_17 0.8341328449244259
ptend_q0001_14 0.7142419565917002
ptend_q0001_15 -0.39195517364672283
ptend_q0001_16 0.798761040496495
ptend_q0001_17 0.539621639601469
ptend_q0001_18 0.7226863383299738
ptend_q0002_28 0.03544894205125815
ptend_q0003_15 -1.3326470718960994
ptend_q0003_16 0.18885631697047534
ptend_q0003_17 0.5584758708186639
ptend_q0003_18 0.7606862424670793
ptend_q0003_28 0.7546021777542491
ptend_q0003_29 0.832351630527032
