In [1]:
import sys

import numpy as np
import netCDF4 as nc
import numpy.matlib
import datetime
import xarray as xr
from scipy import interpolate
from numpy import ma
from scipy import stats
import scipy.io as sio
import pickle as pickle
from sklearn import linear_model
import numpy.ma as ma
import matplotlib.patches as mpatches
from shapely.geometry.polygon import LinearRing

import scipy as sp
import pandas as pd

import time

from copy import copy 

# Plotting
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import ticker

from matplotlib.ticker import FormatStrFormatter
from mpl_toolkits.axes_grid1.axes_divider import HBoxDivider
import mpl_toolkits.axes_grid1.axes_size as Size
from mpl_toolkits.axes_grid1 import make_axes_locatable

# OS interaction
import os
import sys
import cftime

import cartopy.crs as ccrs
from cartopy.util import add_cyclic_point

from IPython.display import display
from IPython.display import HTML
import IPython.core.display as di # Example: di.display_html('<h3>%s:</h3>' % str, raw=True)

import matplotlib.colors as mcolors

import glob
import dask
import dask.bag as db

from scipy import interpolate

import statsmodels.stats.multitest as multitest

from matplotlib.gridspec import GridSpec
from cartopy.crs import EqualEarth, PlateCarree

In [60]:
era5_path = '/glade/campaign/collections/rda/data/ds633.0/'
out_path = '/glade/u/home/zcleveland/scratch/ERA5/dsw/'

In [61]:
# add variables to search and combine
var_list = [
    'lsp', # large scale precipitation
    'cp', # convective precipitation
    'sd', # snow depth
    'msl', # mean sea level pressure
    'tcc', # total cloud cover
    'stl1', # soil temp layer 1
    'stl2', # soil temp layer 2
    'stl3', # soil temp layer 3
    'stl4', # soil temp layer 4
    'swvl1', # soil volume water content layer 1
    'swvl2', # soil volume water content layer 2
    'swvl3', # soil volume water content layer 3
    'swvl4', # soil volume water content layer 4
    '2t', # 2 meter temp
    '2d', # 2 meter dew point
    'ishf', # instant surface heat flux
    'ie', # instant moisture flux
]

In [62]:
# set time array to loop through
years = np.arange(1980,2020)
months = np.arange(1,13)

In [83]:
# Loop through variables in var_directories and process each one
sub_vars = ['avg', 'max', 'min']

for var in var_list:
    
    if any([glob.glob(f'{out_path}{var}_*_avg.nc'),
        glob.glob(f'{out_path}{var}_*_min.nc'),
        glob.glob(f'{out_path}{var}_*_max.nc')]):
        
        print(f'found avg/max/min in {var}. processing...\n')
        
        for year in years:
            
            start_date = int(f'{year}01')
            end_date = int(f'{year}12')
            
            if (os.path.exists(f'{out_path}{var}_{start_date}_{end_date}_dsw.nc')):
                print(f'{out_path}{var}_{start_date}_{end_date}_dsw.nc already exists\n skipping...\n')

            else:
                print(f'{out_path}{var}_{start_date}_{end_date}_dsw.nc')
                data_list = []
                
                for sub_var in sub_vars:
   
                    files = glob.glob(f'{out_path}{var}_{start_date}_{end_date}_dsw_{sub_var}.nc')
                    files.sort()
                    
                    with dask.config.set(**{'array.slicing.split_large_chunks': True}):
                        ds = xr.open_mfdataset(files, concat_dim='time', combine='nested', parallel=True, chunks={'time': 'auto'})
                        var_xx = [varx for varx in ds.data_vars.keys() if f'{var.upper()}' in varx][0]
                        ds = ds.rename_vars({f'{var_xx}': f'{var_xx}_{sub_var.upper()}'})
                        data_list.append(ds)
                        
                # data_list
                # break
                
                print()
                print(f'merging data')
                data = xr.merge(data_list, compat='override')
                print(f'writing data\n')
                data.to_netcdf(f'{out_path}{var}_{start_date}_{end_date}_dsw.nc')
print(f'done')
        

found avg/max/min in sd. processing...

/glade/u/home/zcleveland/scratch/ERA5/dsw/sd_198001_198012_dsw.nc already exists
 skipping...

/glade/u/home/zcleveland/scratch/ERA5/dsw/sd_198101_198112_dsw.nc already exists
 skipping...

/glade/u/home/zcleveland/scratch/ERA5/dsw/sd_198201_198212_dsw.nc already exists
 skipping...

/glade/u/home/zcleveland/scratch/ERA5/dsw/sd_198301_198312_dsw.nc already exists
 skipping...

/glade/u/home/zcleveland/scratch/ERA5/dsw/sd_198401_198412_dsw.nc already exists
 skipping...

/glade/u/home/zcleveland/scratch/ERA5/dsw/sd_198501_198512_dsw.nc already exists
 skipping...

/glade/u/home/zcleveland/scratch/ERA5/dsw/sd_198601_198612_dsw.nc already exists
 skipping...

/glade/u/home/zcleveland/scratch/ERA5/dsw/sd_198701_198712_dsw.nc already exists
 skipping...

/glade/u/home/zcleveland/scratch/ERA5/dsw/sd_198801_198812_dsw.nc already exists
 skipping...

/glade/u/home/zcleveland/scratch/ERA5/dsw/sd_198901_198912_dsw.nc already exists
 skipping...

/glade/u/h