In [1]:
%cd /kaggle/working

import os

from hydra import compose, initialize
from omegaconf import OmegaConf

with initialize(version_base=None, config_path="../experiments/016_norm_v5"):
    cfg = compose(
        config_name="config.yaml", overrides=["debug=True"], return_hydra_config=True
    )

/kaggle/working


In [2]:
from pathlib import Path

import numpy as np
import polars as pl
import xarray as xr
from tqdm import tqdm

debug = True
n_sampling = 9000 if debug else int(625000 * 0.8)
iter_sampling = 10 if debug else 100

In [4]:
df = pl.read_parquet("input/train.parquet", n_rows=10000 if debug else None)

In [9]:
x_array = df[:, 1:557].to_numpy()

## 前処理

In [17]:
# physical constatns from (E3SM_ROOT/share/util/shr_const_mod.F90)
grav = 9.80616  # acceleration of gravity ~ m/s^2
cp = 1.00464e3  # specific heat of dry air   ~ J/kg/K
lv = 2.501e6  # latent heat of evaporation ~ J/kg
lf = 3.337e5  # latent heat of fusion      ~ J/kg
ls = lv + lf  # latent heat of sublimation ~ J/kg
rho_air = (
    101325.0 / (6.02214e26 * 1.38065e-23 / 28.966) / 273.15
)  # density of dry air at STP  ~ kg/m^3
# ~ 1.2923182846924677
# SHR_CONST_PSTD/(SHR_CONST_RDAIR*SHR_CONST_TKFRZ)
# SHR_CONST_RDAIR   = SHR_CONST_RGAS/SHR_CONST_MWDAIR
# SHR_CONST_RGAS    = SHR_CONST_AVOGAD*SHR_CONST_BOLTZ
rho_h20 = 1.0e3  # density of fresh water     ~ kg/m^ 3

In [15]:
grid_path = "/kaggle/working/misc/grid_info/ClimSim_low-res_grid-info.nc"
grid_info = xr.open_dataset(grid_path)
hyai = grid_info["hyai"].to_numpy()
hybi = grid_info["hybi"].to_numpy()
p0 = 1e5
ps = df["state_ps"].to_numpy()
pressures_array = hyai * p0 + hybi[None, :] * ps[:, None]
pressures_array = np.diff(pressures_array, n=1)
print(f"{pressures_array.shape=}")

pressures_array.shape=(10000, 60)


## 水量

In [14]:
water_array = x_array[:, 60:120] + x_array[:, 120:180] + x_array[:, 180:240]
water_array.shape, water_array[0]

((10000, 60),
 array([1.48456291e-06, 1.47749180e-06, 1.46554004e-06, 1.43009203e-06,
        1.41853247e-06, 1.43263880e-06, 1.43797962e-06, 1.41230028e-06,
        1.41259059e-06, 1.39545808e-06, 1.28483556e-06, 1.20852461e-06,
        1.20696588e-06, 1.21846131e-06, 1.25747249e-06, 1.28216891e-06,
        1.28508931e-06, 1.29570503e-06, 1.86475177e-06, 3.54215221e-06,
        5.64159041e-06, 8.28468727e-06, 1.31591138e-05, 2.12735059e-05,
        3.12911327e-05, 4.37278980e-05, 6.22128744e-05, 9.04670266e-05,
        1.20846887e-04, 1.65968615e-04, 2.26157023e-04, 3.06602214e-04,
        4.27570913e-04, 5.30491570e-04, 6.54893414e-04, 8.21625404e-04,
        9.80661482e-04, 1.14783147e-03, 1.39870680e-03, 1.64666236e-03,
        1.84539966e-03, 2.17196447e-03, 2.42082698e-03, 2.58498546e-03,
        3.26316783e-03, 3.75013378e-03, 4.08554754e-03, 5.04706054e-03,
        6.07494259e-03, 7.01202385e-03, 7.85594443e-03, 8.53700461e-03,
        9.24132139e-03, 9.61908042e-03, 9.77965338

## 各層のエネルギー

In [25]:
water_energy_array = water_array * pressures_array * lv / grav
water_energy_array.shape, water_energy_array[0]

((10000, 60),
 array([1.70130977e+00, 3.03674031e+00, 5.34659473e+00, 9.10425896e+00,
        1.53607458e+01, 2.54834677e+01, 4.02469944e+01, 5.92852587e+01,
        8.47686486e+01, 1.14669399e+02, 1.39613431e+02, 1.69035652e+02,
        2.12519246e+02, 2.64249079e+02, 3.27868083e+02, 3.91460256e+02,
        4.47655192e+02, 5.03708953e+02, 7.96061286e+02, 1.64275730e+03,
        2.81856449e+03, 4.41567072e+03, 7.39231634e+03, 1.25536137e+04,
        1.88659757e+04, 2.66548885e+04, 3.81967875e+04, 5.61075813e+04,
        7.62334552e+04, 1.07269503e+05, 1.50520850e+05, 2.10585705e+05,
        3.03032020e+05, 3.87377009e+05, 4.91315555e+05, 6.30092918e+05,
        7.62373169e+05, 8.93716988e+05, 1.07454383e+06, 1.22877568e+06,
        1.31974274e+06, 1.47581212e+06, 1.55837683e+06, 1.57874090e+06,
        1.89518166e+06, 2.07089057e+06, 2.13646581e+06, 2.48256386e+06,
        2.79366081e+06, 3.00958920e+06, 3.16342689e+06, 3.26442889e+06,
        3.41071355e+06, 3.48371337e+06, 3.52341002

In [26]:
# 温度
temp_energy_array = x_array[:, 0:60] * pressures_array * cp / grav
temp_energy_array.shape, temp_energy_array[0]

((10000, 60),
 array([   98424.15327963,   183489.10444772,   335973.02587484,
          628434.81989231,  1123717.6925473 ,  1899015.79348353,
         2967877.57413114,  4297857.28975142,  5880438.17839143,
         7813373.73109888, 10080108.30199832, 12723610.78389943,
        15702926.50460158, 18896615.85014577, 22066745.17296807,
        25163870.80500036, 28216633.92101603, 31131196.39269888,
        33710731.67466716, 36902827.00420462, 40198508.03270261,
        43785808.17049824, 47228918.24703036, 50798076.23320971,
        53083241.23375994, 54761285.93654535, 56206465.63445565,
        57769391.01603334, 59766750.99575   , 62246340.26997215,
        65143938.6741154 , 68283426.89917834, 71499496.98744181,
        74720619.20324974, 77786869.62700363, 80501681.83963309,
        82519471.98168613, 83522423.42193228, 83163968.17447706,
        81489066.95688194, 78658345.07602952, 75133085.48667696,
        71559243.18731001, 68194609.52837212, 65076691.7682159 ,
        620

In [27]:
temp_water_energy_array = water_energy_array + temp_energy_array
temp_water_energy_array.shape, temp_water_energy_array[0]

((10000, 60),
 array([   98425.85458939,   183492.14118803,   335978.37246957,
          628443.92415127,  1123733.05329314,  1899041.2769512 ,
         2967917.82112553,  4297916.57501017,  5880522.94704001,
         7813488.40049822, 10080247.91542928, 12723779.81955175,
        15703139.02384769, 18896880.09922463, 22067073.04105071,
        25164262.26525593, 28217081.5762076 , 31131700.10165233,
        33711527.7359536 , 36904469.7615089 , 40201326.5971901 ,
        43790223.84121997, 47236310.56336755, 50810629.84694294,
        53102107.20947903, 54787940.82506182, 56244662.42196368,
        57825498.59732894, 59842984.45094223, 62353609.77303924,
        65294459.52430611, 68494012.60420623, 71802529.00738843,
        75107996.21250807, 78278185.18177122, 81131774.75783898,
        83281845.15091026, 84416140.41005325, 84238512.00696321,
        82717842.64057511, 79978087.8186072 , 76608897.60980716,
        73117620.01743284, 69773350.42841141, 66971873.42505757,
        641

In [28]:
# wind
u_energy_array = x_array[:, 240:300] * pressures_array / grav
v_energy_array = x_array[:, 300:360] * pressures_array / grav
u_energy_array.shape, v_energy_array.shape,

((10000, 60), (10000, 60))

In [29]:
# wind energy
wind_energy_array = x_array[:, 240:300] ** 2 + x_array[:, 300:360] ** 2
wind_energy_array.shape, wind_energy_array[0]

((10000, 60),
 array([5.79967183e+03, 4.79184995e+03, 3.71398533e+03, 1.80647253e+03,
        1.57353397e+03, 1.26000825e+03, 8.23784736e+02, 4.90113503e+02,
        3.51775206e+02, 2.51855501e+02, 1.24129695e+02, 4.37690797e+01,
        1.51443689e+01, 1.69276235e+00, 3.03117217e+01, 8.64823086e+01,
        2.02305553e+02, 4.45585081e+02, 8.26191340e+02, 1.17216873e+03,
        1.40583133e+03, 1.66428941e+03, 1.84449685e+03, 1.83267444e+03,
        1.71455729e+03, 1.47188561e+03, 1.20464920e+03, 9.54596765e+02,
        7.39858389e+02, 5.78293933e+02, 4.69788212e+02, 3.94269993e+02,
        3.33744520e+02, 2.79261106e+02, 2.29071258e+02, 1.84451499e+02,
        1.47110945e+02, 1.17318440e+02, 9.43302308e+01, 7.72603109e+01,
        6.50419110e+01, 5.68057552e+01, 5.03587943e+01, 4.50503120e+01,
        4.05290625e+01, 3.66164282e+01, 3.27146127e+01, 2.77400810e+01,
        2.33394023e+01, 1.84828804e+01, 1.51957984e+01, 1.07976049e+01,
        8.03793898e+00, 7.36469910e+00, 7.29134697

In [32]:
# wind vec
wind_vec_array = np.sqrt(wind_energy_array)
wind_vec_array.shape, wind_vec_array[0]

((10000, 60),
 array([76.15557647, 69.22318942, 60.94247555, 42.50261797, 39.66779513,
        35.49659489, 28.7016504 , 22.13850724, 18.7556713 , 15.86995594,
        11.14135069,  6.61582041,  3.89157666,  1.30106201,  5.5056082 ,
         9.29958647, 14.22341566, 21.10888629, 28.74354432, 34.23694973,
        37.49441738, 40.79570333, 42.94760589, 42.80974698, 41.40721302,
        38.36516143, 34.70805673, 30.8965494 , 27.20033803, 24.04774278,
        21.67459831, 19.8562331 , 18.26867593, 16.71110727, 15.1351002 ,
        13.58129224, 12.1289301 , 10.83136371,  9.71237514,  8.78978447,
         8.06485654,  7.53695928,  7.09639305,  6.71195292,  6.36624399,
         6.05115098,  5.71966893,  5.26688532,  4.83108707,  4.29917207,
         3.89817886,  3.28597092,  2.83512592,  2.71379791,  2.70024943,
         2.71078557,  2.73629669,  2.76227234,  2.78029906,  2.8016226 ]))

## 単位が W/m^2

In [6]:
cols = ["pbuf_SOLIN", "pbuf_LHFLX", "pbuf_SHFLX", "cam_in_LWUP"]

""" 
作る特徴量
- 全部の合計
- pbuf_SHFLX, pbuf_SHFLX, cam_in_LWUP の合計
- pbuf_SOLIN　と他３つとの差
"""
print(df[cols])

for col in cols:
    print(col, df[:, 1:].columns.index(col))

shape: (10_000, 4)
┌─────────────┬────────────┬────────────┬─────────────┐
│ pbuf_SOLIN  ┆ pbuf_LHFLX ┆ pbuf_SHFLX ┆ cam_in_LWUP │
│ ---         ┆ ---        ┆ ---        ┆ ---         │
│ f64         ┆ f64        ┆ f64        ┆ f64         │
╞═════════════╪════════════╪════════════╪═════════════╡
│ 0.0         ┆ 89.488774  ┆ 5.65951    ┆ 435.869227  │
│ 0.0         ┆ 102.340975 ┆ 10.924796  ┆ 418.813856  │
│ 0.0         ┆ 187.06813  ┆ 6.692277   ┆ 454.118907  │
│ 0.0         ┆ 252.083263 ┆ 34.809143  ┆ 458.833796  │
│ 0.0         ┆ 67.252308  ┆ 9.070964   ┆ 406.082712  │
│ …           ┆ …          ┆ …          ┆ …           │
│ 1222.494478 ┆ 14.081274  ┆ 226.243759 ┆ 535.173337  │
│ 1055.611282 ┆ 128.906815 ┆ 34.335404  ┆ 439.611415  │
│ 908.68778   ┆ 78.248006  ┆ -1.32336   ┆ 444.016211  │
│ 1089.461421 ┆ 52.990573  ┆ 273.916256 ┆ 549.101882  │
│ 915.747971  ┆ 147.217205 ┆ 28.479347  ┆ 474.87686   │
└─────────────┴────────────┴────────────┴─────────────┘
pbuf_SOLIN 361
pbuf_LHFLX 362

In [11]:
sum_energy_array = x_array[:, [361, 362, 363, 371]].sum(axis=1)
sum_energy_array.shape, sum_energy_array[0]

((10000,), 531.0175117780348)

In [12]:
sum_flux_array = x_array[:, [362, 363, 371]].sum(axis=1)
sum_flux_array.shape, sum_flux_array[0]

((10000,), 531.0175117780348)

In [13]:
energy_diff_array = x_array[:, 361] - sum_flux_array
energy_diff_array.shape, energy_diff_array[0]

((10000,), -531.0175117780348)

In [52]:
# 潜熱と顕熱の比 (Bowen ratio):
bowen_ratio_array = x_array[:, 362] / x_array[:, 363]
bowen_ratio_array.shape, bowen_ratio_array[0]

((10000,), 15.812105991111302)

## 単位が𝑁/𝑚2



In [30]:
cols = ["pbuf_TAUX", "pbuf_TAUY"]

""" 
作る特徴量
- pbuf_TAUX, pbuf_TAUY の合計
"""
print(df[cols])

for col in cols:
    print(col, df[:, 1:].columns.index(col))

shape: (10_000, 2)
┌───────────┬───────────┐
│ pbuf_TAUX ┆ pbuf_TAUY │
│ ---       ┆ ---       │
│ f64       ┆ f64       │
╞═══════════╪═══════════╡
│ 0.005498  ┆ -0.012671 │
│ -0.029348 ┆ -0.008927 │
│ 0.192057  ┆ -0.186918 │
│ 0.092754  ┆ -0.083188 │
│ -0.020079 ┆ 0.001456  │
│ …         ┆ …         │
│ -0.03434  ┆ -0.031219 │
│ -0.047314 ┆ -0.057721 │
│ -0.038995 ┆ 0.016687  │
│ -0.084857 ┆ -0.011904 │
│ -0.002867 ┆ 0.02247   │
└───────────┴───────────┘
pbuf_TAUX 364
pbuf_TAUY 365


In [None]:
sum_surface_stress_array = x_array[:, [364, 365]].sum(axis=1)
sum_surface_stress_array.shape, sum_surface_stress_array[0]

## other

In [54]:
cols = [
    "pbuf_SOLIN",
    "pbuf_COSZRS",
    "cam_in_LWUP",
    "cam_in_ALDIF",
    "cam_in_ALDIR",
    "cam_in_ASDIF",
    "cam_in_ASDIR",
]

""" 
作る特徴量
- pbuf_TAUX, pbuf_TAUY の合計
"""
print(df[cols])

for col in cols:
    print(col, df[:, 1:].columns.index(col))
# Net radiative flux

shape: (10_000, 7)
┌─────────────┬─────────────┬─────────────┬──────────────┬─────────────┬─────────────┬─────────────┐
│ pbuf_SOLIN  ┆ pbuf_COSZRS ┆ cam_in_LWUP ┆ cam_in_ALDIF ┆ cam_in_ALDI ┆ cam_in_ASDI ┆ cam_in_ASDI │
│ ---         ┆ ---         ┆ ---         ┆ ---          ┆ R           ┆ F           ┆ R           │
│ f64         ┆ f64         ┆ f64         ┆ f64          ┆ ---         ┆ ---         ┆ ---         │
│             ┆             ┆             ┆              ┆ f64         ┆ f64         ┆ f64         │
╞═════════════╪═════════════╪═════════════╪══════════════╪═════════════╪═════════════╪═════════════╡
│ 0.0         ┆ 0.0         ┆ 435.869227  ┆ 1.0          ┆ 1.0         ┆ 1.0         ┆ 1.0         │
│ 0.0         ┆ 0.0         ┆ 418.813856  ┆ 1.0          ┆ 1.0         ┆ 1.0         ┆ 1.0         │
│ 0.0         ┆ 0.0         ┆ 454.118907  ┆ 1.0          ┆ 1.0         ┆ 1.0         ┆ 1.0         │
│ 0.0         ┆ 0.0         ┆ 458.833796  ┆ 1.0          ┆ 1.0         ┆

In [34]:
net_radiative_flux_array = x_array[:, 361] * x_array[:, 366] - x_array[:, 371]
net_radiative_flux_array.shape, net_radiative_flux_array[0]

((10000,), -435.8692271656608)

In [51]:
# Global solar irradiance
global_solar_irradiance_array = (
    x_array[:, 361] * (1 - x_array[:, 369]) * (1 - x_array[:, 370])
)
global_solar_irradiance_array.shape, global_solar_irradiance_array[0]

((10000,), 0.0)

In [55]:
# global_longwave_flux
global_longwave_flux_array = (
    x_array[:, 371] * (1 - x_array[:, 367]) * (1 - x_array[:, 368])
)
global_longwave_flux_array.shape, global_longwave_flux_array[0]

((10000,), 0.0)