# Create forcing and static data for CREDIT models 

In [None]:
import os
import sys
import glob
import numpy as np
from datetime import datetime, timedelta 

import zarr
import xarray as xr
import netCDF4 as nc

## Forcing dataset

### Import TSI from source

In [None]:
TOA_forcing_path = '/glade/derecho/scratch/dgagne/credit_scalers/solar_radiation_2024-03-29_0204.nc' 

with nc.Dataset(TOA_forcing_path, 'r') as ncio:
    TSI = ncio['tsi'][...]
    TSI_time = ncio['time'][...]
    latitude = ncio['latitude'][...]
    longitude = ncio['longitude'][...]

TSI = TSI/2540585.74 # normalization

### Create datetime reference and xaray.Dataset

In [None]:
# hourly version
base = datetime(2000, 1, 1, 0) # using the year 2000 as base values
date_list = [base + timedelta(hours=h) for h in range(len(TSI_time))]
time = np.array(date_list, dtype='datetime64').astype(dtype='datetime64[ns]')

data_forcing = xr.Dataset(
    {
    'TSI': xr.DataArray(data=TSI,
                        dims=['time', 'latitude', 'longitude'],
                        coords = {'time':time, 'latitude': latitude, 'longitude':longitude, },
                        attrs  = {'units': 'J m-2'}
                       ),
    },
    attrs = {'variables': 'Total Solar Irradiance (TSI)'}
    )

In [None]:
# save_name = '/glade/campaign/cisl/aiml/ksha/CREDIT/forcing_norm.nc'
# data_forcing.to_netcdf(save_name, engine='netcdf4')

In [None]:
# 6 hourly version
base = datetime(2000, 1, 1, 0) # using the year 2000 as base values
date_list_6h = [base + timedelta(hours=6*h) for h in range(int(len(TSI_time)/6))]
time_6h = np.array(date_list_6h, dtype='datetime64').astype(dtype='datetime64[ns]')
data_forcing_6h = data_forcing.sel(time=time_6h)

In [None]:
save_name = '/glade/campaign/cisl/aiml/ksha/CREDIT/forcing_norm_6h.nc'
data_forcing_6h.to_netcdf(save_name, engine='netcdf4')

## Static variables

### Import LSM and Z_GDS4_SFC from source

In [None]:
# static from ksha

latitude_weights = '/glade/u/home/wchapman/MLWPS/DataLoader/LSM_static_variables_ERA5_zhght.nc'

with nc.Dataset(latitude_weights, 'r') as ncio:
    LSM = ncio['LSM'][...]
    Z_GDS4_SFC = ncio['Z_GDS4_SFC'][...]
    latitude = ncio['latitude'][...]
    longitude = ncio['longitude'][...]

# # Z_GDS4_SFC[Z_GDS4_SFC<0] = 0
# # Z_GDS4_SFC[LSM[0, ...]==0] = 0.0

Z_GDS4_SFC = 2 * (Z_GDS4_SFC - np.min(Z_GDS4_SFC)) / (np.max(Z_GDS4_SFC) - np.min(Z_GDS4_SFC))

# # get from somewhere else
# ds_full = xr.open_zarr(
#     '/glade/campaign/cisl/aiml/wchapman/MLWPS/STAGING/STD_SixHourly_TOTAL_2020-01-01_2020-12-31_staged.zarr')

### Create and save the dataset

In [None]:
data_static = xr.Dataset(
    {
    'Z_GDS4_SFC': xr.DataArray(data=Z_GDS4_SFC,
                               dims=['latitude', 'longitude'],
                               coords = {'latitude': latitude, 'longitude':longitude},
                               attrs  = {'units': 'm**2 s**-2'}
                              ),
    'LSM': xr.DataArray(data=LSM[0, ...],
                        dims=['latitude', 'longitude'],
                        coords = {'latitude': latitude, 'longitude':longitude},
                        attrs  = {'units': 'none'}
                       )
    },
    attrs = {'variables': 'Geopotential relative to the mean sea level (Z_GDS4_SFC); Land-Sea Mask (LSM)'}
    )

In [None]:
# save_name = '/glade/campaign/cisl/aiml/ksha/CREDIT/static.nc'
# data_static.to_netcdf(save_name, engine='netcdf4')

In [None]:
save_name = '/glade/campaign/cisl/aiml/ksha/CREDIT/static_norm_old.nc'
data_static.to_netcdf(save_name, engine='netcdf4')