# Pre-process ERA5 model level data for CREDIT

In [None]:
import numpy as np
import xarray as xr
import metview as mv

In [30]:
import matplotlib.pyplot as plt
%matplotlib inline

How to install metview

```
conda install metview-python -c conda-forge
pip install metview
mamba install cfgrib
```

* ERA5 / IFS 137 level coefs: https://confluence.ecmwf.int/display/UDOC/L137+model+level+definitions
* Compute pressure on model levels: https://confluence.ecmwf.int/display/CKB/ERA5%3A+compute+pressure+and+geopotential+on+model+levels%2C+geopotential+height+and+geometric+height
* Model level data pre-processing with Metview: https://github.com/google-research/arco-era5/blob/main/docs/1-Model-Levels-Walkthrough.ipynb
* Convert pressure level data to model levle: https://github.com/google-research/arco-era5/blob/main/docs/moisture_dataset.py

In [2]:
coef_a = np.array([
    2.000365, 3.102241, 4.666084, 6.827977, 9.746966, 13.605424, 18.608931, 
    24.985718, 32.98571, 42.879242, 54.955463, 69.520576, 86.895882, 
    107.415741, 131.425507, 159.279404, 191.338562, 227.968948, 269.539581, 
    316.420746, 368.982361, 427.592499, 492.616028, 564.413452, 643.339905, 
    729.744141, 823.967834, 926.34491, 1037.201172, 1156.853638, 1285.610352, 
    1423.770142, 1571.622925, 1729.448975, 1897.519287, 2076.095947, 
    2265.431641, 2465.770508, 2677.348145, 2900.391357, 3135.119385, 
    3381.743652, 3640.468262, 3911.490479, 4194.930664, 4490.817383, 
    4799.149414, 5119.89502, 5452.990723, 5798.344727, 6156.074219, 
    6526.946777, 6911.870605, 7311.869141, 7727.412109, 8159.354004, 
    8608.525391, 9076.400391, 9562.682617, 10065.97852, 10584.63184, 
    11116.66211, 11660.06738, 12211.54785, 12766.87305, 13324.66895, 
    13881.33106, 14432.13965, 14975.61523, 15508.25684, 16026.11523, 
    16527.32227, 17008.78906, 17467.61328, 17901.62109, 18308.43359, 
    18685.71875, 19031.28906, 19343.51172, 19620.04297, 19859.39063, 
    20059.93164, 20219.66406, 20337.86328, 20412.30859, 20442.07813, 
    20425.71875, 20361.81641, 20249.51172, 20087.08594, 19874.02539, 
    19608.57227, 19290.22656, 18917.46094, 18489.70703, 18006.92578, 
    17471.83984, 16888.6875, 16262.04688, 15596.69531, 14898.45313, 
    14173.32422, 13427.76953, 12668.25781, 11901.33984, 11133.30469, 
    10370.17578, 9617.515625, 8880.453125, 8163.375, 7470.34375, 
    6804.421875, 6168.53125, 5564.382813, 4993.796875, 4457.375, 
    3955.960938, 3489.234375, 3057.265625, 2659.140625, 2294.242188, 
    1961.5, 1659.476563, 1387.546875, 1143.25, 926.507813, 734.992188, 
    568.0625, 424.414063, 302.476563, 202.484375, 122.101563, 62.78125, 
    22.835938, 3.757813, 0, 0
])

In [3]:
coef_b = np.array([
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0.000007, 0.000024, 0.000059, 0.000112, 0.000199, 0.00034, 0.000562, 
    0.00089, 0.001353, 0.001992, 0.002857, 0.003971, 0.005378, 0.007133, 
    0.009261, 0.011806, 0.014816, 0.018318, 0.022355, 0.026964, 0.032176, 
    0.038026, 0.044548, 0.051773, 0.059728, 0.068448, 0.077958, 0.088286, 
    0.099462, 0.111505, 0.124448, 0.138313, 0.153125, 0.16891, 0.185689, 
    0.203491, 0.222333, 0.242244, 0.263242, 0.285354, 0.308598, 0.332939, 
    0.358254, 0.384363, 0.411125, 0.438391, 0.466003, 0.4938, 0.521619, 
    0.549301, 0.576692, 0.603648, 0.630036, 0.655736, 0.680643, 0.704669, 
    0.727739, 0.749797, 0.770798, 0.790717, 0.809536, 0.827256, 0.843881, 
    0.859432, 0.873929, 0.887408, 0.8999, 0.911448, 0.922096, 0.931881, 
    0.94086, 0.949064, 0.95655, 0.963352, 0.969513, 0.975078, 0.980072, 
    0.984542, 0.9885, 0.991984, 0.995003, 0.99763, 1
])

In [4]:
def attribute_fix(ds):
    """Needed to fix a low-level bug in ecCodes.
    
    Sometimes, shortNames get overloaded in ecCodes's table. 
    To eliminate ambiguity in their string matching, we
    force ecCodes to make use of the paramId, which is a
    consistent source-of-truth.
    """
    for var in ds:
        attrs = ds[var].attrs
        result = attrs.pop('GRIB_cfName', None)
        result = attrs.pop('GRIB_cfVarName', None)
        result = attrs.pop('GRIB_shortName', None)
        ds[var].attrs.update(attrs)
    return ds

In [5]:
'model-level-wind.zarr-v2/'
'single-level-forecast.zarr-v2/'
'single-level-reanalysis.zarr-v2/'

'single-level-reanalysis.zarr-v2/'

In [6]:
# era5_moisture = xr.open_zarr(
#     "gs://gcp-public-data-arco-era5/co/model-level-moisture.zarr-v2/",
#     consolidated=True,
# )

# # Specific humidity (kg/kg) (q)
# # Ozone mass mixing ratio (kg/kg) (o3)
# # Specific cloud liquid water content (kg/kg) (clwc)
# # Specific cloud ice water content (kg/kg) (ciwc)
# # Specific cloud rain water content (kg/kg) (crwc)
# # Specific cloud snow water content (kg/kg) (cswc)
# # Fraction of cloud cover (0-1) (cc)

In [7]:
# era5_wind = xr.open_zarr(
#     "gs://gcp-public-data-arco-era5/co/model-level-wind.zarr-v2/",
#     consolidated=True,
# )

# # Divergence (1/s) (d)
# # Vorticity (1/s) (vo)
# # Temperature (K) (t)
# # Vertical velocity (Pa/s) (w)

In [9]:
era5_surface = xr.open_zarr(
    'gs://gcp-public-data-arco-era5/co/single-level-surface.zarr-v2/',
    consolidated=True,
)

In [None]:
OURS_dataset = xr.open_dataset('/glade/campaign/cisl/aiml/ksha/CREDIT/ML_grid.nc')
x_OURS = np.array(OURS_dataset['longitude'])
y_OURS = np.array(OURS_dataset['latitude'])
lon_OURS, lat_OURS = np.meshgrid(x_OURS, y_OURS)
shape_OURS = lon_OURS.shape

In [115]:
era5_surface['time'].isel(time=slice(-10, -1))

Unnamed: 0,Array,Chunk
Bytes,72 B,72 B
Shape,"(9,)","(9,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 72 B 72 B Shape (9,) (9,) Dask graph 1 chunks in 3 graph layers Data type datetime64[ns] numpy.ndarray",9  1,

Unnamed: 0,Array,Chunk
Bytes,72 B,72 B
Shape,"(9,)","(9,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray


In [116]:
len(era5_surface['time'])

1090608

In [118]:
len(era5_surface['z'])

1090608

In [10]:
datestring="1987-10-16T03"
surface_slice = era5_surface.sel(time=slice(datestring, datestring)).compute()

In [12]:
surface_fieldset = mv.dataset_to_fieldset(attribute_fix(surface_slice).squeeze())

failed to set key 'stepUnits:int' to 1
failed to set key 'endStep:int' to 0
failed to set key 'stepUnits:int' to 1
failed to set key 'stepUnits:int' to 1
failed to set key 'endStep:int' to 0
failed to set key 'stepUnits:int' to 1


In [13]:
surface_fieldset

<metview.bindings.Fieldset at 0x15230063c8d0>

In [83]:
surface_gg = mv.read(data=surface_fieldset, grid='N320')

In [105]:
surface_gg.describe()

parameter,typeOfLevel,level,date,time,step,number,paramId,class,stream,type,experimentVersionNumber
lnsp,hybrid,1,19871016,300,0,,152,,,,
z,hybrid,1,19871016,300,0,,129,,,,


In [109]:
surface_numpy_values = surface_gg.values()
lat_points = surface_gg.latitudes()
lon_points = surface_gg.longitudes()

In [111]:
# mv.write('output.nc', surface_gg_interp)
# test = xr.open_dataset('output.nc', engine='cfgrib')