In [1]:
%cd /kaggle/working

import os

from hydra import compose, initialize
from omegaconf import OmegaConf

with initialize(version_base=None, config_path="../experiments/032_feature_channel"):
    cfg = compose(
        config_name="config.yaml", overrides=["debug=True"], return_hydra_config=True
    )

/kaggle/working


In [52]:
import warnings
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import polars as pl
import seaborn as sns

warnings.filterwarnings("ignore", "is_categorical_dtype")
warnings.filterwarnings("ignore", "use_inf_as_na")
pl.Config.set_tbl_cols(-1)
pl.Config.set_tbl_rows(100)

polars.config.Config

In [120]:
df = pl.read_parquet("input/train.parquet", n_rows=100000)

In [5]:
import xarray as xr

grid_path = "/kaggle/working/misc/grid_info/ClimSim_low-res_grid-info.nc"
grid_info = xr.open_dataset(grid_path)
hyam = grid_info["hyam"].to_numpy()
hybm = grid_info["hybm"].to_numpy()
hyai = grid_info["hyai"].to_numpy()
hybi = grid_info["hybi"].to_numpy()

p0 = 1e5
ps = df["state_ps"].to_numpy()
pressures_array = hyam * p0 + hybm[None, :] * ps[:, None]

pressures_array[0]

array([7.83478113e+00, 1.41108318e+01, 2.52923297e+01, 4.49250635e+01,
       7.86346161e+01, 1.34735576e+02, 2.24477729e+02, 3.61643148e+02,
       5.61583643e+02, 8.40325322e+02, 1.21444894e+03, 1.70168280e+03,
       2.32107981e+03, 3.09143463e+03, 4.02775807e+03, 5.13746323e+03,
       6.41892284e+03, 7.86396576e+03, 9.46300920e+03, 1.12091274e+04,
       1.30977804e+04, 1.51221318e+04, 1.72683435e+04, 1.95265236e+04,
       2.18653833e+04, 2.42423874e+04, 2.66410575e+04, 2.90605803e+04,
       3.15131516e+04, 3.40169391e+04, 3.65888174e+04, 3.92401190e+04,
       4.19760536e+04, 4.47970440e+04, 4.76993790e+04, 5.06735901e+04,
       5.37010959e+04, 5.67515928e+04, 5.97841201e+04, 6.27531457e+04,
       6.56180947e+04, 6.83522047e+04, 7.09463085e+04, 7.34056360e+04,
       7.57415383e+04, 7.79627207e+04, 8.00704969e+04, 8.20599900e+04,
       8.39258449e+04, 8.56688208e+04, 8.72996849e+04, 8.88387642e+04,
       9.03119580e+04, 9.17455140e+04, 9.31618329e+04, 9.45774558e+04,
      

## 変化量そのものを確認

In [134]:
h = 45

tmp = df
tmp = tmp.with_columns(
    [
        (pl.col(f"state_q0002_{h}") + pl.col(f"state_q0003_{h}")).alias(
            f"state_cloud_water_{h}"
        ),
        (pl.col(f"ptend_q0002_{h}") + pl.col(f"ptend_q0003_{h}")).alias(
            f"ptend_cloud_water_{h}"
        ),
        (
            pl.col(f"state_q0003_{h}")
            / (pl.col(f"state_q0002_{h}") + pl.col(f"state_q0003_{h}") + 1e-60)
        ).alias("state_ice_ratio"),
        (
            pl.col(f"ptend_q0003_{h}")
            / (pl.col(f"ptend_q0002_{h}") + pl.col(f"ptend_q0003_{h}") + 1e-60)
        ).alias("ptend_ice_ratio"),
    ]
)

cols = [
    f"state_t_{h}",
    f"state_cloud_water_{h}",
    f"ptend_cloud_water_{h}",
    "state_ice_ratio",
    "ptend_ice_ratio",
    f"state_q0002_{h}",
    f"ptend_q0002_{h}",
    f"state_q0003_{h}",
    f"ptend_q0003_{h}",
]

tmp[cols]

state_t_45,state_cloud_water_45,ptend_cloud_water_45,state_ice_ratio,ptend_ice_ratio,state_q0002_45,ptend_q0002_45,state_q0003_45,ptend_q0003_45
f64,f64,f64,f64,f64,f64,f64,f64,f64
279.949099,0.000005,-4.1578e-9,0.007344,0.007344,0.000005,-4.1273e-9,3.6643e-8,-3.0536e-11
277.214222,0.000012,-1.0135e-8,0.031372,0.031372,0.000012,-9.8171e-9,3.8155e-7,-3.1796e-10
282.913622,0.00006,-4.6361e-9,0.0,-0.0,0.00006,-4.6361e-9,0.0,0.0
282.804401,0.000081,2.2404e-8,0.0,0.0,0.000081,2.2404e-8,0.0,0.0
275.264212,0.000004,-3.3230e-9,0.022292,0.022292,0.000004,-3.2489e-9,8.8890e-8,-7.4075e-11
275.169738,0.000041,8.2949e-9,0.003247,-0.013515,0.000041,8.4070e-9,1.3452e-7,-1.1210e-10
283.440614,0.000013,4.9590e-10,0.0,0.0,0.000013,4.9590e-10,0.0,0.0
283.236308,0.000028,6.7274e-9,0.0,0.0,0.000028,6.7274e-9,0.0,0.0
277.894434,0.000053,9.7034e-9,0.022356,-0.102559,0.000052,1.0699e-8,0.000001,-9.9516e-10
279.495795,0.000004,-3.3172e-9,0.049773,0.049773,0.000004,-3.1521e-9,1.9813e-7,-1.6511e-10


In [131]:
tmp.filter(pl.col(f"ptend_q0002_{h}") == 0)[cols].describe()

state_t_45,cloud_water_45,state_q0002_45,ptend_q0002_45,state_q0003_45,ptend_q0003_45
f64,f64,f64,f64,f64,f64
287.028379,5.7e-05,5.7e-05,-3.0839e-08,8.7093e-12,-7.2578e-15
286.00772,0.000136,0.000136,2.362e-08,0.0,0.0
285.577576,3.1e-05,3.1e-05,6.8245e-09,0.0,0.0
286.125538,8.6e-05,8.6e-05,2.5643e-08,2.89e-11,-2.4083e-14
286.05596,1.6e-05,1.6e-05,-1.183e-08,1.1472e-12,-9.5599e-16


## 変化量を足して変化後の値を元に検証

In [67]:
tmelt = 273.15

tmax_fice = tmelt - 10.0  #! max temperature for cloud ice formation
tmin_fice = tmax_fice - 30.0  #! min temperature for cloud ice formation
tmax_fsnow = tmelt  #! max temperature for transition to convective snow
tmin_fsnow = tmelt - 5.0  #! min temperature for transition to convective snow

In [121]:
h = 30

tmp = df
tmp = (
    tmp.with_columns(
        [
            (pl.col(f"state_t_{h}") + 1200.0 * pl.col(f"ptend_t_{h}")).alias(
                f"new_t_{h}"
            ),
            (pl.col(f"state_q0002_{h}") + 1200.0 * pl.col(f"ptend_q0002_{h}")).alias(
                f"new_q0002_{h}"
            ),
            (pl.col(f"state_q0003_{h}") + 1200.0 * pl.col(f"ptend_q0003_{h}")).alias(
                f"new_q0003_{h}"
            ),
        ]
    )
    .with_columns(
        # 絶対値が元の値の絶対値以下なら０埋めする
        [
            pl.when(pl.col(f"new_q0002_{h}").abs() < pl.col(f"ptend_q0002_{h}").abs())
            .then(0)
            .otherwise(pl.col(f"new_q0002_{h}"))
            .alias(f"new_q0002_{h}"),
            pl.when(pl.col(f"new_q0003_{h}").abs() < pl.col(f"ptend_q0003_{h}").abs())
            .then(0)
            .otherwise(pl.col(f"new_q0003_{h}"))
            .alias(f"new_q0003_{h}"),
        ]
    )
    .with_columns(
        [
            (pl.col(f"new_q0002_{h}") + pl.col(f"new_q0003_{h}")).alias(
                f"cloud_water_{h}"
            ),
            (
                pl.col(f"new_q0003_{h}")
                / (pl.col(f"new_q0002_{h}") + pl.col(f"new_q0003_{h}") + 1e-60)
            ).alias("ice_ratio"),
            (
                (
                    (pl.col(f"new_t_{h}") > tmax_fice)
                    & (pl.col(f"new_t_{h}") > tmax_fsnow)
                )
            ).alias("no_ice"),
            (
                (
                    (pl.col(f"new_t_{h}") < tmin_fice)
                    & (pl.col(f"new_t_{h}") < tmin_fsnow)
                )
            ).alias("all_ice"),
        ]
    )
)

cols = [
    "no_ice",
    "all_ice",
    f"ice_ratio",
    f"state_t_{h}",
    f"new_t_{h}",
    f"cloud_water_{h}",
    f"state_q0002_{h}",
    f"ptend_q0002_{h}",
    f"new_q0002_{h}",
    f"state_q0003_{h}",
    f"ptend_q0003_{h}",
    f"new_q0003_{h}",
]

tmp[cols].head()

no_ice,all_ice,ice_ratio,state_t_30,new_t_30,cloud_water_30,state_q0002_30,ptend_q0002_30,new_q0002_30,state_q0003_30,ptend_q0003_30,new_q0003_30
bool,bool,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
False,False,0.0,243.663599,243.637593,0.0,9.0725e-16,-7.560400000000001e-19,0.0,0.0,0.0,0.0
False,False,0.0,238.911192,238.898024,0.0,9.0725e-16,-7.560400000000001e-19,0.0,0.0,0.0,0.0
False,False,0.0,251.639426,251.591888,0.0,3.4508e-10,-2.8757e-13,0.0,8e-06,-6.6868e-09,0.0
False,False,0.0,250.429055,250.381662,0.0,1.314e-09,-1.095e-12,0.0,4e-06,-3.7082e-09,0.0
False,False,1.0,236.938842,236.893842,3.1273e-07,8.5367e-14,-7.1139e-17,0.0,8e-06,-6.7685e-09,3.1273e-07


In [122]:
# 話を単純化するために cloud_water が０のやつは取り除いておく
tmp_filter = tmp.filter(pl.col(f"cloud_water_{h}") != 0)[cols]
tmp_filter.head()

no_ice,all_ice,ice_ratio,state_t_30,new_t_30,cloud_water_30,state_q0002_30,ptend_q0002_30,new_q0002_30,state_q0003_30,ptend_q0003_30,new_q0003_30
bool,bool,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
False,False,1.0,236.938842,236.893842,3.1273e-07,8.5367e-14,-7.1139e-17,0.0,8e-06,-6.7685e-09,3.1273e-07
False,False,1.0,240.631083,240.623996,6.2e-05,6.9637e-09,-5.8031e-12,0.0,6.2e-05,2.7574e-11,6.2e-05
False,False,1.0,249.701027,249.756637,6.1e-05,1.6521e-08,-1.3768e-11,0.0,6.2e-05,-3.7104e-10,6.1e-05
False,False,1.0,250.469308,250.585922,0.000115,6.8749e-08,-5.7291e-11,0.0,0.000115,7.2108e-10,0.000115
False,False,1.0,243.296692,243.272579,2.8e-05,1.9726e-10,-1.6438e-13,0.0,2.2e-05,4.8938e-09,2.8e-05


### q2

In [123]:
tmp_filter.filter(pl.col(f"new_q0002_{h}") == 0)[cols].describe()

statistic,no_ice,all_ice,ice_ratio,state_t_30,new_t_30,cloud_water_30,state_q0002_30,ptend_q0002_30,new_q0002_30,state_q0003_30,ptend_q0003_30,new_q0003_30
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""count""",36966.0,36966.0,36966.0,36966.0,36966.0,36966.0,36966.0,36966.0,36966.0,36966.0,36966.0,36966.0
"""null_count""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""mean""",0.0,0.457312,1.0,234.805604,234.812722,1.8e-05,7.0958e-08,-5.9131e-11,0.0,1.8e-05,5.8918e-10,1.8e-05
"""std""",,,0.0,12.581315,12.601764,3.4e-05,3.2954e-07,2.7461e-10,0.0,3e-05,7.6986e-09,3.4e-05
"""min""",0.0,0.0,1.0,209.798248,209.791489,5.1868e-12,0.0,-7.059e-09,0.0,0.0,-6.0521e-08,5.1868e-12
"""25%""",,,1.0,224.178347,224.164327,5.6302e-07,0.0,-2.6108e-13,0.0,8.0563e-07,-7.5684e-10,5.6302e-07
"""50%""",,,1.0,235.15789,235.149595,3e-06,2.1585e-16,-1.7955000000000001e-19,0.0,4e-06,7.2438e-12,3e-06
"""75%""",,,1.0,247.382118,247.397372,1.9e-05,3.1329e-10,0.0,0.0,1.9e-05,6.5144e-10,1.9e-05
"""max""",0.0,1.0,1.0,254.333341,254.314089,0.000362,8e-06,0.0,0.0,0.000263,1.6334e-07,0.000362


In [124]:
tmp_filter.filter(pl.col(f"new_q0002_{h}") > 0)[cols].describe()

statistic,no_ice,all_ice,ice_ratio,state_t_30,new_t_30,cloud_water_30,state_q0002_30,ptend_q0002_30,new_q0002_30,state_q0003_30,ptend_q0003_30,new_q0003_30
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""count""",2669.0,2669.0,2669.0,2669.0,2669.0,2669.0,2669.0,2669.0,2669.0,2669.0,2669.0,2669.0
"""null_count""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""mean""",0.0,0.0,0.949576,253.863867,254.078158,0.000127,9e-06,-1.7782e-09,7e-06,0.000109,8.7442e-09,0.00012
"""std""",,,0.043149,1.033146,1.02428,7.3e-05,1e-05,2.5785e-09,8e-06,5.8e-05,2.0469e-08,6.8e-05
"""min""",0.0,0.0,0.779197,247.757344,248.77515,2.063e-07,0.0,-1.7245e-08,1.5282e-10,5.9091e-08,-1.0136e-07,2.0323e-07
"""25%""",,,0.92194,253.190186,253.36684,6.6e-05,2e-06,-2.7038e-09,8.5103e-07,6.1e-05,-2.7243e-09,6.3e-05
"""50%""",,,0.958393,253.839074,254.02089,0.000123,5e-06,-1.1156e-09,4e-06,0.000109,5.3241e-09,0.000116
"""75%""",,,0.98752,254.529594,254.734821,0.000182,1.4e-05,-1.9185e-10,1.1e-05,0.000154,1.879e-08,0.000171
"""max""",0.0,0.0,0.999994,256.758412,257.171022,0.000374,6.9e-05,1.1732e-08,5.5e-05,0.000305,1.4292e-07,0.000349


### q3

In [125]:
tmp_filter.filter(pl.col(f"new_q0003_{h}") > 0)[cols].describe()

statistic,no_ice,all_ice,ice_ratio,state_t_30,new_t_30,cloud_water_30,state_q0002_30,ptend_q0002_30,new_q0002_30,state_q0003_30,ptend_q0003_30,new_q0003_30
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""count""",39635.0,39635.0,39635.0,39635.0,39635.0,39635.0,39635.0,39635.0,39635.0,39635.0,39635.0,39635.0
"""null_count""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""mean""",0.0,0.426517,0.996604,236.088977,236.110047,2.6e-05,6.9431e-07,-1.7489e-10,4.8444e-07,2.4e-05,1.1383e-09,2.5e-05
"""std""",,,0.016883,13.058114,13.095498,4.7e-05,3e-06,8.3874e-10,3e-06,4e-05,9.3626e-09,4.5e-05
"""min""",0.0,0.0,0.779197,209.798248,209.791489,5.1868e-12,0.0,-1.7245e-08,0.0,0.0,-1.0136e-07,5.1868e-12
"""25%""",,,1.0,224.989419,224.971749,6.5422e-07,0.0,-2.1968e-12,0.0,9.277e-07,-7.9244e-10,6.5422e-07
"""50%""",,,1.0,236.916377,236.896772,4e-06,4.3608e-15,-1.4236e-18,0.0,5e-06,1.3196e-11,4e-06
"""75%""",,,1.0,249.166501,249.197529,2.7e-05,4.4245e-09,0.0,0.0,2.7e-05,9.0975e-10,2.7e-05
"""max""",0.0,1.0,1.0,256.758412,257.171022,0.000374,6.9e-05,1.1732e-08,5.5e-05,0.000305,1.6334e-07,0.000362


In [126]:
tmp_filter.filter(pl.col(f"new_q0003_{h}") == 0)[cols].describe()

statistic,no_ice,all_ice,ice_ratio,state_t_30,new_t_30,cloud_water_30,state_q0002_30,ptend_q0002_30,new_q0002_30,state_q0003_30,ptend_q0003_30,new_q0003_30
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""count""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""null_count""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""mean""",,,,,,,,,,,,
"""std""",,,,,,,,,,,,
"""min""",,,,,,,,,,,,
"""25%""",,,,,,,,,,,,
"""50%""",,,,,,,,,,,,
"""75%""",,,,,,,,,,,,
"""max""",,,,,,,,,,,,


In [119]:
max_tmp = 273.28
min_tmp = 253.15

tmp_filter.with_columns(
    ((max_tmp - pl.col("new_t_45")) / (max_tmp - min_tmp)).alias("theory_ratio")
).with_columns((pl.col("new_t_45")).cast(pl.Int32).alias("t")).group_by("t").agg(
    pl.col("ice_ratio").mean(),
    pl.col("ice_ratio").std().alias("std"),
    pl.col("theory_ratio").mean().alias("theory_ratio"),
).sort(
    by=["t"]
).filter(
    (pl.col("ice_ratio") < 1.0) & (pl.col("ice_ratio") > 0)
)

t,ice_ratio,std,theory_ratio
i32,f64,f64,f64
252,0.999999,6e-06,1.031646
253,0.982964,0.014862,0.982343
254,0.93377,0.015099,0.933432
255,0.88805,0.016489,0.8857
256,0.833731,0.019614,0.83288
257,0.784459,0.016209,0.785793
258,0.733061,0.016929,0.734448
259,0.680779,0.017087,0.683872
260,0.632983,0.016232,0.635739
261,0.581707,0.016601,0.582857
