# Interpolate ERA5 pressure level data to 1 deg

This notebook performs conservative interpolation from 0.25 degree to 1.0 degree and prepares the dataset used in the paper.

## Contents
* **Main_file_groups**: upper-air, single-level, and diagnostic variables.
* **Static & physics file**: static variables, required file for running physics-based schemes.
* **Cloud and humidity file group**: ERA5 humidity components (not used in the paper).
* **Bilinear interpolation**: a reference to compare againt the conservative interpolation (not used in the paper).

In [1]:
import os
import sys
import yaml
import dask
import zarr
import numpy as np
import xarray as xr
from glob import glob

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
import xesmf as xe

## Conservative interpolation

### Main file groups

In [4]:
sys.path.insert(0, os.path.realpath('../libs/'))
import interp_utils as iu

In [5]:
config_name = os.path.realpath('data_config_6h.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)

year = 1979 # test on one year

In [6]:
base_dir = conf['zarr_opt']['save_loc']
base_dir_1deg = conf['zarr_opt']['save_loc_1deg']

# load all 0.25 deg ERA5 data
zarr_name_surf = base_dir+'surf/ERA5_plevel_6h_surf_{}.zarr'
zarr_name_surf_extra = base_dir+'surf/ERA5_plevel_6h_surf_extend_{}.zarr'
zarr_name_accum = base_dir+'accum/ERA5_plevel_6h_accum_{}.zarr'
zarr_name_forcing = base_dir+'forcing/ERA5_plevel_6h_forcing_{}.zarr'
zarr_name_upper = base_dir+'upper_air/ERA5_plevel_6h_upper_air_{}.zarr'
zarr_name_upper_Q = base_dir+'upper_air/ERA5_plevel_6h_Q_{}.zarr'
zarr_name_static = base_dir+'static/ERA5_plevel_6h_static.zarr'

ds_surf = xr.open_zarr(zarr_name_surf.format(year))
ds_surf_extra = xr.open_zarr(zarr_name_surf_extra.format(year))
ds_accum = xr.open_zarr(zarr_name_accum.format(year))
ds_forcing = xr.open_zarr(zarr_name_forcing.format(year))
ds_upper = xr.open_zarr(zarr_name_upper.format(year))
ds_upper_Q = xr.open_zarr(zarr_name_upper_Q.format(year))
ds_static = xr.open_zarr(zarr_name_static)

# merge all and drop SST (this var is not needed)
ds_merge = xr.merge([ds_surf, ds_accum, ds_forcing, ds_upper, ds_upper_Q, ds_surf_extra])
ds_merge = ds_merge.drop_vars('SSTK')

# ======================================================================================= #
# 0.25 deg to 1 deg interpolation using conservative approach

# Define the target 1-degree grid
lon_1deg = np.arange(0, 360, 1)
lat_1deg = np.arange(-90, 91, 1)
target_grid = iu.Grid.from_degrees(lon_1deg, lat_1deg)

lon_025deg = ds_merge['longitude'].values
lat_025deg = ds_merge['latitude'].values[::-1]
source_grid = iu.Grid.from_degrees(lon_025deg, lat_025deg)

regridder = iu.ConservativeRegridder(source=source_grid, target=target_grid)

ds_merge = ds_merge.chunk({'longitude': -1, 'latitude': -1})
ds_merge_1deg = regridder.regrid_dataset(ds_merge)

# Reorder the dimensions for all variables in ds_merge_1deg
for var in ds_merge_1deg.data_vars:
    # Get the current dimensions of the variable
    current_dims = ds_merge_1deg[var].dims
    
    # If both 'latitude' and 'longitude' are present, reorder them
    if 'latitude' in current_dims and 'longitude' in current_dims:
        # New order: move 'latitude' and 'longitude' to the first two positions, preserve other dimensions
        new_order = [dim for dim in current_dims if dim not in ['latitude', 'longitude']] + ['latitude', 'longitude']
        
        # Transpose the variable to the new order
        ds_merge_1deg[var] = ds_merge_1deg[var].transpose(*new_order)

lon_1deg = np.arange(0, 360, 1)
lat_1deg = np.arange(-90, 91, 1)

# Add latitude and longitude as coordinates to ds_merge_1deg
ds_merge_1deg = ds_merge_1deg.assign_coords({
    'latitude': lat_1deg,
    'longitude': lon_1deg
})

# flip latitude from -90 --> 90 to 90 --> -90
ds_merge_1deg = ds_merge_1deg.isel(latitude=slice(None, None, -1))

# float64 --> float32
ds_merge_1deg = ds_merge_1deg.astype(
    {var: np.float32 for var in ds_merge_1deg if ds_merge_1deg[var].dtype == np.float64})

# ======================================================================================= #
# process land-sea mask and sea ice
land_sea_mask = ds_static['land_sea_mask']

# Define the target 1-degree grid
lon_1deg = np.arange(0, 360, 1)
lat_1deg = np.arange(-90, 91, 1)[::-1]

# Create target grid as an xarray Dataset
ds_out = xr.Dataset(
    {
        'latitude': (['latitude'], lat_1deg),
        'longitude': (['longitude'], lon_1deg)
    }
)

# Create the regridder object for bilinear interpolation
regridder = xe.Regridder(ds_merge, ds_out, 'nearest_s2d')

# Apply the regridding to interpolate all variables
land_sea_mask_1deg = regridder(land_sea_mask)

# combine CI and land-sea mask
sea_ice_cover = ds_merge_1deg['CI']
land_sea_mask_expanded = land_sea_mask_1deg.broadcast_like(sea_ice_cover)

land_sea_CI_mask = xr.where(
    (land_sea_mask_expanded == 0) & (sea_ice_cover > 0),
    -sea_ice_cover,
    land_sea_mask_expanded
)

ds_merge_1deg['land_sea_CI_mask'] = land_sea_CI_mask
ds_merge_1deg = ds_merge_1deg.drop_vars('CI')

# Convert latitude, longitude, and level coordinates to float32
ds_merge_1deg = ds_merge_1deg.assign_coords({
    'latitude': ds_merge_1deg['latitude'].astype(np.float32),
    'longitude': ds_merge_1deg['longitude'].astype(np.float32),
    'level': ds_merge_1deg['level'].astype(np.float32)
})

# ========================================================================== #
# chunking
varnames = list(ds_merge_1deg.keys())
varname_4D = ['U', 'V', 'T', 'Z', 'Q', 'specific_total_water']

for i_var, var in enumerate(varnames):
    if var in varname_4D:
        ds_merge_1deg[var] = ds_merge_1deg[var].chunk(conf['zarr_opt']['chunk_size_4d_1deg'])
    else:
        ds_merge_1deg[var] = ds_merge_1deg[var].chunk(conf['zarr_opt']['chunk_size_3d_1deg'])

# zarr encodings
dict_encoding = {}

chunk_size_3d = dict(chunks=(conf['zarr_opt']['chunk_size_3d_1deg']['time'],
                             conf['zarr_opt']['chunk_size_3d_1deg']['latitude'],
                             conf['zarr_opt']['chunk_size_3d_1deg']['longitude']))

chunk_size_4d = dict(chunks=(conf['zarr_opt']['chunk_size_4d_1deg']['time'],
                             conf['zarr_opt']['chunk_size_4d_1deg']['level'],
                             conf['zarr_opt']['chunk_size_4d_1deg']['latitude'],
                             conf['zarr_opt']['chunk_size_4d_1deg']['longitude']))

compress = zarr.Blosc(cname='zstd', clevel=1, shuffle=zarr.Blosc.SHUFFLE, blocksize=0)

for i_var, var in enumerate(varnames):
    if var in varname_4D:
        dict_encoding[var] = {'compressor': compress, **chunk_size_4d}
    else:
        dict_encoding[var] = {'compressor': compress, **chunk_size_3d}

# ========================================================================== #
# save

save_name = base_dir_1deg + 'all_in_one/ERA5_plevel_1deg_6h_{}_conserve.zarr'.format(year)
# ds_merge_1deg.to_zarr(save_name, mode='w', consolidated=True, compute=True, encoding=dict_encoding)

In [8]:
save_name

'/glade/derecho/scratch/ksha/CREDIT_data/ERA5_plevel_1deg/all_in_one/ERA5_plevel_1deg_6h_1979_conserve.zarr'

### Static & physics file

In [29]:
config_name = os.path.realpath('data_config_6h.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)

In [30]:
base_dir_1deg = conf['zarr_opt']['save_loc_1deg']
base_dir = conf['ARCO']['save_loc'] + 'static/' 
static_name = base_dir + conf['ARCO']['prefix'] + '_static.zarr'

In [31]:
ds_static = xr.open_zarr(static_name)

In [37]:
# ================================================================================== #
# geopotential at surface
lon_1deg = np.arange(0, 360, 1)
lat_1deg = np.arange(-90, 91, 1)
target_grid = iu.Grid.from_degrees(lon_1deg, lat_1deg)

lon_025deg = ds_static['longitude'].values
lat_025deg = ds_static['latitude'].values[::-1]
source_grid = iu.Grid.from_degrees(lon_025deg, lat_025deg)

regridder = iu.ConservativeRegridder(source=source_grid, target=target_grid)

ds_static = ds_static.chunk({'longitude': -1, 'latitude': -1})
ds_static_1deg = regridder.regrid_dataset(ds_static)

for var in ds_static_1deg.data_vars:
    # Get the current dimensions of the variable
    current_dims = ds_static_1deg[var].dims
    
    # If both 'latitude' and 'longitude' are present, reorder them
    if 'latitude' in current_dims and 'longitude' in current_dims:
        # New order: move 'latitude' and 'longitude' to the first two positions, preserve other dimensions
        new_order = [dim for dim in current_dims if dim not in ['latitude', 'longitude']] + ['latitude', 'longitude']
        
        # Transpose the variable to the new order
        ds_static_1deg[var] = ds_static_1deg[var].transpose(*new_order)

lon_1deg = np.arange(0, 360, 1)
lat_1deg = np.arange(-90, 91, 1)

# Add latitude and longitude as coordinates to ds_static_1deg
ds_static_1deg = ds_static_1deg.assign_coords({
    'latitude': lat_1deg,
    'longitude': lon_1deg
})

# flip latitude from -90 --> 90 to 90 --> -90
ds_static_1deg = ds_static_1deg.isel(latitude=slice(None, None, -1))

# ================================================================================== #
# normalized geopotential at surface

# normalize 'geopotential_at_surface
mean_val = float(ds_static_1deg['geopotential_at_surface'].mean(skipna=False))
std_val = float(ds_static_1deg['geopotential_at_surface'].std(skipna=False))
z_norm = (ds_static_1deg['geopotential_at_surface'] - mean_val)/std_val
ds_static_1deg['z_norm'] = z_norm

# ======================================================================================= #
# process land-sea mask and sea ice
land_sea_mask = ds_static['land_sea_mask']

# Define the target 1-degree grid
lon_1deg = np.arange(0, 360, 1)
lat_1deg = np.arange(-90, 91, 1)[::-1]

# Create target grid as an xarray Dataset
ds_out = xr.Dataset(
    {
        'latitude': (['latitude'], lat_1deg),
        'longitude': (['longitude'], lon_1deg)
    }
)

# Create the regridder object for bilinear interpolation
regridder = xe.Regridder(ds_static, ds_out, 'nearest_s2d')

# Apply the regridding to interpolate all variables
land_sea_mask_1deg = regridder(land_sea_mask)
ds_static_1deg['land_sea_mask'] = land_sea_mask_1deg

# ======================================================================================= #
# process soil type
soil_type = ds_static['soil_type']

soil_type_1deg = regridder(soil_type)
ds_static_1deg['soil_type'] = soil_type_1deg

# ================================================================================== #
# physics variables

# lon1d and lat1d are 1D
lon1d = ds_static_1deg['longitude'].values
lat1d = ds_static_1deg['latitude'].values

# Generate 2D latitude and longitude arrays
lon2d, lat2d = np.meshgrid(lon1d, lat1d)

# Define pressure levels (p_level) as a new dimension
p_level = np.array([   100.,    200.,    300.,    500.,    700.,   1000.,   2000.,
                      3000.,   5000.,   7000.,  10000.,  12500.,  15000.,  17500.,
                     20000.,  22500.,  25000.,  30000.,  35000.,  40000.,  45000.,
                     50000.,  55000.,  60000.,  65000.,  70000.,  75000.,  77500.,
                     80000.,  82500.,  85000.,  87500.,  90000.,  92500.,  95000.,
                     97500., 100000.])

# Add 2D longitude and latitude arrays to the dataset
ds_static_1deg['lon2d'] = xr.DataArray(lon2d, dims=('latitude', 'longitude'))
ds_static_1deg['lat2d'] = xr.DataArray(lat2d, dims=('latitude', 'longitude'))

# get plevel coordiante from upper air
ds_example = xr.open_zarr(base_dir_1deg+'all_in_one/ERA5_plevel_1deg_6h_1979_conserve.zarr')

# Add pressure levels as a new variable with its own dimension
ds_static_1deg['p_level'] = xr.DataArray(p_level, dims=('level'))
ds_static_1deg = ds_static_1deg.assign_coords(level=('level', p_level))

# ================================================================================== #
# float64 --> float32
ds_static_1deg = ds_static_1deg.astype({var: np.float32 for var in ds_static_1deg})

# Convert latitude, longitude, and level coordinates to float32
ds_static_1deg = ds_static_1deg.assign_coords({
    'latitude': ds_example['latitude'],
    'longitude': ds_example['longitude'],
    'level': ds_example['level']
})

# ================================================================================== #
# chunk
varnames = list(ds_static_1deg.keys())
varnames = varnames[:-1] # subtract 'level'

for i_var, var in enumerate(varnames):
    ds_static_1deg[var] = ds_static_1deg[var].chunk({'latitude': 181, 'longitude': 360})

# zarr encodings
dict_encoding = {}

chunk_size_2d = dict(chunks=(conf['zarr_opt']['chunk_size_3d_1deg']['latitude'],
                             conf['zarr_opt']['chunk_size_3d_1deg']['longitude']))

compress = zarr.Blosc(cname='zstd', clevel=1, shuffle=zarr.Blosc.SHUFFLE, blocksize=0)

for i_var, var in enumerate(varnames):
    dict_encoding[var] = {'compressor': compress, **chunk_size_2d}

In [44]:
save_name = base_dir_1deg + 'static/ERA5_plevel_1deg_6h_conserve_static.zarr'
# ds_static_1deg.to_zarr(save_name, mode='w', consolidated=True, compute=True, encoding=dict_encoding)

<xarray.backends.zarr.ZarrStore at 0x14fd5d7a1150>

### Cloud and humidity file group

In [5]:
config_name = os.path.realpath('data_config_6h.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)

year = 1979 # test on one year

In [6]:
base_dir = conf['zarr_opt']['save_loc']
base_dir_1deg = conf['zarr_opt']['save_loc_1deg']

In [10]:
# load all 0.25 deg ERA5 data
zarr_name_cloud = base_dir+'cloud/ERA5_plevel_6h_cloud_{}.zarr'
ds_cloud = xr.open_zarr(zarr_name_cloud.format(year))

# ======================================================================================= #
# 0.25 deg to 1 deg interpolation using conservative approach

# Define the target 1-degree grid
lon_1deg = np.arange(0, 360, 1)
lat_1deg = np.arange(-90, 91, 1)
target_grid = iu.Grid.from_degrees(lon_1deg, lat_1deg)

lon_025deg = ds_cloud['longitude'].values
lat_025deg = ds_cloud['latitude'].values[::-1]
source_grid = iu.Grid.from_degrees(lon_025deg, lat_025deg)

regridder = iu.ConservativeRegridder(source=source_grid, target=target_grid)

ds_cloud = ds_cloud.chunk({'longitude': -1, 'latitude': -1})
ds_cloud_1deg = regridder.regrid_dataset(ds_cloud)

# Reorder the dimensions for all variables
for var in ds_cloud_1deg.data_vars:
    # Get the current dimensions of the variable
    current_dims = ds_cloud_1deg[var].dims
    
    # If both 'latitude' and 'longitude' are present, reorder them
    if 'latitude' in current_dims and 'longitude' in current_dims:
        # New order: move 'latitude' and 'longitude' to the first two positions, preserve other dimensions
        new_order = [dim for dim in current_dims if dim not in ['latitude', 'longitude']] + ['latitude', 'longitude']
        
        # Transpose the variable to the new order
        ds_cloud_1deg[var] = ds_cloud_1deg[var].transpose(*new_order)

lon_1deg = np.arange(0, 360, 1)
lat_1deg = np.arange(-90, 91, 1)

# Add latitude and longitude as coordinates to ds_cloud_1deg
ds_cloud_1deg = ds_cloud_1deg.assign_coords({
    'latitude': lat_1deg,
    'longitude': lon_1deg
})

# flip latitude from -90 --> 90 to 90 --> -90
ds_cloud_1deg = ds_cloud_1deg.isel(latitude=slice(None, None, -1))

# float64 --> float32
ds_cloud_1deg = ds_cloud_1deg.astype(
    {var: np.float32 for var in ds_cloud_1deg if ds_cloud_1deg[var].dtype == np.float64})

# Convert latitude, longitude, and level coordinates to float32
ds_cloud_1deg = ds_cloud_1deg.assign_coords({
    'latitude': ds_cloud_1deg['latitude'].astype(np.float32),
    'longitude': ds_cloud_1deg['longitude'].astype(np.float32),
    'level': ds_cloud_1deg['level'].astype(np.float32)
})

# ========================================================================== #
# chunking
varnames = list(ds_cloud_1deg.keys())

for i_var, var in enumerate(varnames):
    ds_cloud_1deg[var] = ds_cloud_1deg[var].chunk(conf['zarr_opt']['chunk_size_4d_1deg'])

# zarr encodings
dict_encoding = {}

chunk_size_4d = dict(chunks=(conf['zarr_opt']['chunk_size_4d_1deg']['time'],
                             conf['zarr_opt']['chunk_size_4d_1deg']['level'],
                             conf['zarr_opt']['chunk_size_4d_1deg']['latitude'],
                             conf['zarr_opt']['chunk_size_4d_1deg']['longitude']))

compress = zarr.Blosc(cname='zstd', clevel=1, shuffle=zarr.Blosc.SHUFFLE, blocksize=0)

for i_var, var in enumerate(varnames):
    dict_encoding[var] = {'compressor': compress, **chunk_size_4d}

# ========================================================================== #
# save
save_name = base_dir_1deg + 'cloud/ERA5_plevel_1deg_6h_cloud_{}_conserve.zarr'.format(year)


### Compare against bilinear interp

In [12]:
varname = 'land_sea_CI_mask'

In [13]:
ds_conserve_1deg = xr.open_zarr(base_dir_1deg + 'all_in_one/ERA5_plevel_1deg_6h_1979_conserve.zarr')
ds_bilinear_1deg = xr.open_zarr(base_dir_1deg + 'all_in_one/bilinear/ERA5_plevel_1deg_6h_1979_bilinear.zarr')

blinear_np = ds_conserve_1deg[varname].isel(time=999)
conserve_np = ds_bilinear_1deg[varname].isel(time=999)

In [24]:
# plt.pcolormesh(blinear_np, cmap=plt.cm.jet)
# plt.colorbar()

In [25]:
# plt.pcolormesh(conserve_np, cmap=plt.cm.jet)
# plt.colorbar()

## Bilinear interpolation (old)

In [None]:
import xesmf as xe

In [5]:
config_name = os.path.realpath('data_config_6h.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)

year = 1979 # test on one year

base_dir = conf['zarr_opt']['save_loc']
base_dir_1deg = conf['zarr_opt']['save_loc_1deg']

# load all 0.25 deg ERA5 data
zarr_name_surf = base_dir+'surf/ERA5_plevel_6h_surf_{}.zarr'
zarr_name_surf_extra = base_dir+'surf/ERA5_plevel_6h_surf_extend_{}.zarr'
zarr_name_accum = base_dir+'accum/ERA5_plevel_6h_accum_{}.zarr'
zarr_name_forcing = base_dir+'forcing/ERA5_plevel_6h_forcing_{}.zarr'
zarr_name_upper = base_dir+'upper_air/ERA5_plevel_6h_upper_air_{}.zarr'
zarr_name_upper_Q = base_dir+'upper_air/ERA5_plevel_6h_Q_{}.zarr'

ds_surf = xr.open_zarr(zarr_name_surf.format(year))
ds_surf_extra = xr.open_zarr(zarr_name_surf_extra.format(year))
ds_accum = xr.open_zarr(zarr_name_accum.format(year))
ds_forcing = xr.open_zarr(zarr_name_forcing.format(year))
ds_upper = xr.open_zarr(zarr_name_upper.format(year))
ds_upper_Q = xr.open_zarr(zarr_name_upper_Q.format(year))

# ========================================================================== #
# Interpolate to 1-degree resolution using xESMF

# Define the target 1-degree grid
lon_1deg = np.arange(0, 360, 1)
lat_1deg = np.arange(-90, 91, 1)[::-1]

# Create target grid as an xarray Dataset
ds_out = xr.Dataset(
    {
        'latitude': (['latitude'], lat_1deg),
        'longitude': (['longitude'], lon_1deg)
    }
)

# Create the regridder object for bilinear interpolation
regridder = xe.Regridder(ds_merge, ds_out, 'bilinear')

# Apply the regridding to interpolate all variables
ds_merge_1deg = regridder(ds_merge)

# Post-process 'land_sea_CI_mask' to ensure ocean=0, land=1, sea-ice=-1~0 after interpolation
land_sea_CI_mask_interp = ds_merge_1deg['land_sea_CI_mask']

# Apply the following logic:
# - If value >= 0.5, set to 1 (land)
# - If value <= -0.01, keep as is (sea-ice)
# - Else, set to 0 (ocean)
land_sea_CI_mask_interp = xr.where(
    land_sea_CI_mask_interp >= 0.5,
    1,
    xr.where(
        land_sea_CI_mask_interp <= -0.01,
        land_sea_CI_mask_interp,
        0
    )
)

# Update the dataset
ds_merge_1deg['land_sea_CI_mask'] = land_sea_CI_mask_interp

save_name = base_dir_1deg + 'all_in_one/ERA5_plevel_1deg_6h_{}.zarr'.format(year)
# ds_merge_1deg.to_zarr(save_name, mode='w', consolidated=True, compute=True, encoding=dict_encoding)

### Prepare static file

In [1]:
import os
import sys
import yaml
import dask
import zarr
import xesmf as xe
import numpy as np
import xarray as xr
from glob import glob

In [7]:
config_name = os.path.realpath('data_config_6h.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)

In [8]:
base_dir_1deg = conf['zarr_opt']['save_loc_1deg']
base_dir = conf['ARCO']['save_loc'] + 'static/' 
static_name = base_dir + conf['ARCO']['prefix'] + '_static.zarr'

ds_static = xr.open_zarr(static_name)

In [16]:
# Define the target 1-degree grid
lon_1deg = np.arange(0, 360, 1)
lat_1deg = np.arange(-90, 91, 1)[::-1]

# Create target grid as an xarray Dataset
ds_out = xr.Dataset(
    {
        'latitude': (['latitude'], lat_1deg),
        'longitude': (['longitude'], lon_1deg)
    }
)

# Create the regridder object for bilinear interpolation
regridder = xe.Regridder(ds_static, ds_out, 'bilinear')

# Apply the regridding to interpolate all variables
ds_static_1deg = regridder(ds_static)

# land mask interp
land_sea_mask_interp = ds_static_1deg['land_sea_mask']
land_sea_mask_interp = xr.where(land_sea_mask_interp >= 0.5, 1, 0)
ds_static_1deg['land_sea_mask'] = land_sea_mask_interp
ds_static_1deg['land_sea_mask'] = ds_static_1deg['land_sea_mask'].astype('float32')

# normalize 'geopotential_at_surface'
mean_val = float(ds_static_1deg['geopotential_at_surface'].mean(skipna=False))
std_val = float(ds_static_1deg['geopotential_at_surface'].std(skipna=False))
z_norm = (ds_static_1deg['geopotential_at_surface'] - mean_val)/std_val

varnames = list(ds_static_1deg.keys())

for i_var, var in enumerate(varnames):
    ds_static_1deg[var] = ds_static_1deg[var].chunk({'latitude': 181, 'longitude': 360})

# zarr encodings
dict_encoding = {}

chunk_size_2d = dict(chunks=(conf['zarr_opt']['chunk_size_3d_1deg']['latitude'],
                             conf['zarr_opt']['chunk_size_3d_1deg']['longitude']))

compress = zarr.Blosc(cname='zstd', clevel=1, shuffle=zarr.Blosc.SHUFFLE, blocksize=0)

for i_var, var in enumerate(varnames):
    dict_encoding[var] = {'compressor': compress, **chunk_size_2d}

In [53]:
save_name = base_dir_1deg + 'static/ERA5_plevel_1deg_6h_static_zarr'
#ds_static_1deg.to_zarr(save_name, mode='w', consolidated=True, compute=True, encoding=dict_encoding)

<xarray.backends.zarr.ZarrStore at 0x15103e3ed310>

### Prepare physics file

In [9]:
ds_phy = xr.open_zarr(
    '/glade/derecho/scratch/ksha/CREDIT_data/ERA5_plevel_1deg/static/ERA5_plevel_1deg_6h_static.zarr')

In [10]:
# lon1d and lat1d are 1D arrays from the dataset
lon1d = ds_phy['longitude'].values
lat1d = ds_phy['latitude'].values

# Generate 2D latitude and longitude arrays
lon2d, lat2d = np.meshgrid(lon1d, lat1d)

# Define pressure levels (p_level) as a new dimension
p_level = np.array([   100.,    200.,    300.,    500.,    700.,   1000.,   2000.,
                      3000.,   5000.,   7000.,  10000.,  12500.,  15000.,  17500.,
                     20000.,  22500.,  25000.,  30000.,  35000.,  40000.,  45000.,
                     50000.,  55000.,  60000.,  65000.,  70000.,  75000.,  77500.,
                     80000.,  82500.,  85000.,  87500.,  90000.,  92500.,  95000.,
                     97500., 100000.])

# Add 2D longitude and latitude arrays to the dataset
ds_phy['lon2d'] = xr.DataArray(lon2d, dims=('latitude', 'longitude'))
ds_phy['lat2d'] = xr.DataArray(lat2d, dims=('latitude', 'longitude'))

# Add pressure levels as a new variable with its own dimension
ds_phy['p_level'] = xr.DataArray(p_level, dims=('level'))

# If you want to use 'p_level' as a coordinate (e.g., to define variables that depend on it), add it as a coordinate
ds_phy = ds_phy.assign_coords(level=('level', p_level))

In [13]:
ds_phy = ds_phy.drop_vars(['z_norm', 'land_sea_mask'])

In [16]:
# ds_phy.to_zarr('/glade/derecho/scratch/ksha/CREDIT_data/ERA5_plevel_1deg/static/ERA5_plevel_1deg_6h_physics.zarr')

In [17]:
# ds_test = xr.open_zarr('/glade/derecho/scratch/ksha/CREDIT_data/ERA5_plevel_1deg/static/ERA5_plevel_1deg_6h_physics.zarr')
# ds_test