# Preprocessing (accumulated) surface wind stress from ERA5

In [1]:
import os
import sys
import yaml
import dask
import zarr
import numpy as np
import xarray as xr
import pandas as pd
from glob import glob

import calendar
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

sys.path.insert(0, os.path.realpath('../libs/'))
import verif_utils as vu

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
year = 1979
N_days = 366 if year % 4 == 0 else 365

config_name = os.path.realpath('data_config_6h.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)

In [4]:
# save to zarr
base_dir = conf['ARCO']['save_loc'] + 'accum/' 
if not os.path.exists(base_dir):
    os.makedirs(base_dir)

compress = zarr.Blosc(cname='zstd', clevel=1, shuffle=zarr.Blosc.SHUFFLE, blocksize=0)


chunk_size_3d = dict(chunks=(conf['ARCO']['chunk_size_3d']['time'],
                             conf['ARCO']['chunk_size_3d']['latitude'],
                             conf['ARCO']['chunk_size_3d']['longitude']))

dict_encoding = {}

for i_var, var in enumerate(conf['ARCO']['varname_accum']):
    dict_encoding[var] = {'compressor': compress, **chunk_size_3d}

In [5]:
ERA5_1h = xr.open_zarr(
    "gs://gcp-public-data-arco-era5/ar/full_37-1h-0p25deg-chunk-1.zarr-v3",
    chunks=None,
    storage_options=dict(token='anon'),)

time_start = '{}-12-31T00'.format(year-1)
time_start_save = '{}-01-01T00'.format(year)
time_end = '{}-01-03T23'.format(year)
#time_end = '{}-12-31T23'.format(year)
ERA5_1h_yearly = ERA5_1h.sel(time=slice(time_start, time_end))

variables_levels = {
    'eastward_gravity_wave_surface_stress': None,
    'eastward_turbulent_surface_stress': None,
    'northward_gravity_wave_surface_stress': None,
    'northward_turbulent_surface_stress': None
}

ERA5_1h_save = vu.ds_subset_everything(ERA5_1h_yearly, variables_levels)

ERA5_1h_shifted = ERA5_1h_save.shift(time=-1)
ERA5_6h = ERA5_1h_shifted.resample(time='6h').sum()
ERA5_6h['time'] = ERA5_6h['time'] + pd.Timedelta(hours=6)

ERA5_6h_save = ERA5_6h.sel(time=slice(time_start_save, time_end))
ERA5_6h_save = ERA5_6h_save.chunk(conf['ARCO']['chunk_size_3d'])

save_name = base_dir + conf['ARCO']['prefix'] + '_surf_stress_{}.zarr'.format(year)
# ERA5_6h_save.to_zarr(save_name, mode="w", consolidated=True, compute=True, encoding=dict_encoding)

In [6]:
save_name

'/glade/derecho/scratch/ksha/CREDIT_data/ERA5_plevel_base/accum/ERA5_plevel_6h_accum_1979.zarr'

In [7]:
ERA5_6h_save

Unnamed: 0,Array,Chunk
Bytes,47.53 MiB,39.61 MiB
Shape,"(12, 721, 1440)","(10, 721, 1440)"
Dask graph,2 chunks in 1 graph layer,2 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 47.53 MiB 39.61 MiB Shape (12, 721, 1440) (10, 721, 1440) Dask graph 2 chunks in 1 graph layer Data type float32 numpy.ndarray",1440  721  12,

Unnamed: 0,Array,Chunk
Bytes,47.53 MiB,39.61 MiB
Shape,"(12, 721, 1440)","(10, 721, 1440)"
Dask graph,2 chunks in 1 graph layer,2 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,47.53 MiB,39.61 MiB
Shape,"(12, 721, 1440)","(10, 721, 1440)"
Dask graph,2 chunks in 1 graph layer,2 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 47.53 MiB 39.61 MiB Shape (12, 721, 1440) (10, 721, 1440) Dask graph 2 chunks in 1 graph layer Data type float32 numpy.ndarray",1440  721  12,

Unnamed: 0,Array,Chunk
Bytes,47.53 MiB,39.61 MiB
Shape,"(12, 721, 1440)","(10, 721, 1440)"
Dask graph,2 chunks in 1 graph layer,2 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,47.53 MiB,39.61 MiB
Shape,"(12, 721, 1440)","(10, 721, 1440)"
Dask graph,2 chunks in 1 graph layer,2 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 47.53 MiB 39.61 MiB Shape (12, 721, 1440) (10, 721, 1440) Dask graph 2 chunks in 1 graph layer Data type float32 numpy.ndarray",1440  721  12,

Unnamed: 0,Array,Chunk
Bytes,47.53 MiB,39.61 MiB
Shape,"(12, 721, 1440)","(10, 721, 1440)"
Dask graph,2 chunks in 1 graph layer,2 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,47.53 MiB,39.61 MiB
Shape,"(12, 721, 1440)","(10, 721, 1440)"
Dask graph,2 chunks in 1 graph layer,2 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 47.53 MiB 39.61 MiB Shape (12, 721, 1440) (10, 721, 1440) Dask graph 2 chunks in 1 graph layer Data type float32 numpy.ndarray",1440  721  12,

Unnamed: 0,Array,Chunk
Bytes,47.53 MiB,39.61 MiB
Shape,"(12, 721, 1440)","(10, 721, 1440)"
Dask graph,2 chunks in 1 graph layer,2 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
