In [2]:
# parameters
config_dir = "../experiments"
exp_name = "091_cloud_wather_then_q23/base_all"  # "072_save_input_valid/base_all_save

In [3]:
%cd /kaggle/working

from pathlib import Path

from hydra import compose, initialize
from omegaconf import OmegaConf

with initialize(
    version_base=None, config_path=f"{config_dir}/{exp_name.split('/')[0]}"
):
    cfg = compose(
        config_name="config.yaml",
        overrides=[f"exp={exp_name.split('/')[-1]}"],
        return_hydra_config=True,
    )
import pickle

import matplotlib.pyplot as plt
import numpy as np
import polars as pl
import seaborn as sns

# 定数定義
output_dir = Path(f"output/experiments/{exp_name}")
gcs_path = f"gs://{cfg.dir.gcs_bucket}/{cfg.dir.gcs_base_dir}/experiments/{exp_name}/"

# カラムの定義
single_targets = [
    "cam_out_NETSW",
    "cam_out_FLWDS",
    "cam_out_PRECSC",
    "cam_out_PRECC",
    "cam_out_SOLS",
    "cam_out_SOLL",
    "cam_out_SOLSD",
    "cam_out_SOLLD",
]
seq_targets = [
    "ptend_t",
    "ptend_q0001",
    "ptend_q0002",
    "ptend_q0003",
    "ptend_u",
    "ptend_v",
]
target_columns = []
for col in seq_targets:
    for i in range(60):
        target_columns.append(f"{col}_{i}")
target_columns.extend(single_targets)


# 結果などの読み込み
r2_score_dict = pickle.load(open(output_dir / "r2_score_dict.pkl", "rb"))
ss_df = pl.read_csv(
    "input/leap-atmospheric-physics-ai-climsim/sample_submission.csv", n_rows=1
)
weight_array = ss_df.select([x for x in ss_df.columns if x != "sample_id"]).to_numpy()[
    0
]

/kaggle/working


In [31]:
n_rows = 500000

print("read original_xs")
original_xs_df = pl.read_parquet(
    gcs_path + "original_xs.parquet", retries=5, n_rows=n_rows
)
print("read predict")
predict_df = pl.read_parquet(gcs_path + "predict.parquet", retries=5, n_rows=n_rows)
print("read label")
label_df = pl.read_parquet(gcs_path + "label.parquet", retries=5, n_rows=n_rows)

read original_xs
read predict
read label


In [32]:
import pickle

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import polars as pl
import seaborn as sns

from utils.metric import score

In [33]:
# 普通に計算
preds = predict_df[:, 1:].to_numpy()
original_xs = original_xs_df[:, 1:].to_numpy()

for col in cfg.exp.fill_target:
    col_index = cfg.cols.col_names.index(col)
    preds[:, col_index] = original_xs[:, col_index] / (-1200)

predict_weight_df = pd.DataFrame(
    preds * weight_array,
    columns=[i for i in range(preds.shape[1])],
).reset_index()
label_weight_df = pd.DataFrame(
    label_df[:, 1:].to_numpy() * weight_array,
    columns=[i for i in range(preds.shape[1])],
).reset_index()

r2_scores = score(
    label_weight_df,
    predict_weight_df,
    "index",
    multioutput="raw_values",
)

r2_score_dict_const = {
    col: r2 for col, r2 in dict(zip(cfg.cols.col_names, r2_scores)).items()
}
print("r2: ", np.mean(list(r2_score_dict_const.values())))

r2:  0.7441657130805308


## 明らかにバランスがおかしい snow rate を修正する


In [44]:

h = 40
temp = original_xs_df[:, 1 + h]

true_q2 = original_xs_df[:, 121 + h] + label_df[:, 121 + h] * 1200
true_q3 = original_xs_df[:, 181 + h] + label_df[:, 181 + h] * 1200
true_cloud_water = true_q2 + true_q3
true_snow_rate = (true_q3) / (true_cloud_water + 1e-60)

In [47]:
non_zero_index = true_snow_rate > 0

true_snow_rate.to_numpy()[non_zero_index], temp.to_numpy()[non_zero_index]

(array([1.31612246e-01, 3.77944354e-20, 1.48761297e-01, ...,
        2.87214262e-02, 1.00000000e+00, 1.00000000e+00]),
 array([270.48316061, 275.85696992, 270.41081691, ..., 272.98562252,
        276.37338546, 275.23698443]))

In [48]:
true_q2.to_numpy()[non_zero_index]

array([1.33427803e-05, 4.37726138e-05, 1.16122697e-07, ...,
       5.69969822e-05, 0.00000000e+00, 0.00000000e+00])

In [49]:
true_q3.to_numpy()[non_zero_index]

array([2.02222254e-06, 1.65436123e-24, 2.02934418e-08, ...,
       1.68544294e-06, 6.61744490e-24, 6.61744490e-24])