In [1]:
# import functions
# OS interaction and time
import os
import sys
import cftime
import datetime
import time
import glob
import dask
import dask.bag as db
import calendar

# math and data
import numpy as np
import netCDF4 as nc
import xarray as xr
import scipy as sp
from scipy.signal import detrend
import pandas as pd
import pickle as pickle
from sklearn import linear_model
import matplotlib.patches as mpatches
from shapely.geometry.polygon import LinearRing
import statsmodels.stats.multitest as multitest

# plotting
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import ticker
import matplotlib.colors as mcolors
from matplotlib.gridspec import GridSpec

from matplotlib.ticker import FormatStrFormatter
from mpl_toolkits.axes_grid1.axes_divider import HBoxDivider
import mpl_toolkits.axes_grid1.axes_size as Size
from mpl_toolkits.axes_grid1 import make_axes_locatable

import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.util import add_cyclic_point

# random
from IPython.display import display
from IPython.display import HTML
import IPython.core.display as di # Example: di.display_html('<h3>%s:</h3>' % str, raw=True)

In [2]:
era5_path = '/glade/campaign/collections/rda/data/ds633.0/'
my_era5_path = '/glade/u/home/zcleveland/scratch/ERA5/'

In [3]:
# # calculate total precipitation using cp and lsp
# var_list = ['lsp', 'cp']
# for year in range(1980,2020):
#     lsp = xr.open_dataset(f'{my_era5_path}dsw/{year}/lsp_{year}01_{year}12_dsw.nc')
#     cp = xr.open_dataset(f'{my_era5_path}dsw/{year}/cp_{year}01_{year}12_dsw.nc')

#     temp = lsp['LSP']+cp['CP']
#     tp = xr.Dataset({'TP': temp})
#     tp.to_netcdf(f'{my_era5_path}dsw/tp_{year}01_{year}_12_dsw.nc')

In [None]:
# calculate precipitation
def calc_NAM_tp(overwrite_flag=False):

    # check existence of file
    out_fp = f'{my_era5_path}dsw/NAM_precipitation.nc'
    if os.path.exists(out_fp):
        print('NAM_tp file already exists at:\n', out_fp)
        if not overwrite_flag:  # skip and don't overwrite
            print('overwrite_flag set to False.  Set to True to overwrite.')
            return
        else:  # overwrite current file
            print('overwrite_flag set to True.  Overwriting . . .')

    # open datasets for onset, retreat, and total precipitation
    onset = xr.open_dataset(f'{my_era5_path}dsw/NAM_onset.nc')['onset_date'].load()
    retreat = xr.open_dataset(f'{my_era5_path}dsw/NAM_retreat.nc')['retreat_date'].load()
    tp_files = glob.glob(f'{my_era5_path}dsw/*/tp_*_dsw.nc')
    tp = xr.open_mfdataset(tp_files)['TP'].load()

    # ensure the datasets have the same lat/lon before apply_ufunc
    assert (onset.latitude == retreat.latitude).all()
    assert (onset.longitude == retreat.longitude).all()
    assert (onset.latitude == tp.latitude).all()
    assert (onset.longitude == tp.longitude).all()

    # define a function to calculate tp between onset and retreat dates
    def calculate_total_precip(onset_date, retreat_date, tp):

        # ensure onset_date and retreat_date are xarray.DataArray
        onset_date = xr.DataArray(onset_date)
        retreat_date = xr.DataArray(retreat_date)

        time_range = pd.date_range(start='1980-01-01', end='2019-12-31', freq='D')

        # use xarray indexing to subset the tp data
        if (np.isfinite(onset_date.values) & np.isfinite(retreat_date.values)):
            onset_ind = time_range.get_loc(onset_date.values)
            retreat_ind = time_range.get_loc(retreat_date.values)

            monsoon_tp = tp[onset_ind : retreat_ind]
            total_precip = np.sum(monsoon_tp)

            if onset_date.dt.year == 1981:
                print('onset_date: ', onset_date, ' - retreat_date: ', retreat_date)
                print('onset_ind: ', onset_ind, ' - retreat_ind: ', retreat_ind)
                print('monsoon_tp: ', monsoon_tp)
                print('total_precip: ', total_precip)
                print('\n')
        else:
            total_precip = np.nan
        return total_precip


    # apply the function using xr.apply_ufunc
    NAM_tp = xr.apply_ufunc(
        calculate_total_precip,
        onset,
        retreat,
        tp,
        input_core_dims=[[], [], ['time']],
        output_core_dims=[[]],
        vectorize=True,
        dask='parallelized',
        output_dtypes=[tp.dtype]
    )

    # save to netcdf file
    NAM_tp.rename('precipitation').to_netcdf(out_fp)

In [41]:
# define a function to calculate the precipitation rate for NAM
# this is total precip during NAM divided by length of NAM
def calc_NAM_precip_rate(overwrite_flag=False):

    # create outfile name and check if it already exists
    out_fp = os.path.join(my_era5_path, 'dsw/NAM_precipitation-rate.nc')
    if os.path.exists(out_fp):
        print('precipitation-rate file already exists.')
        if not overwrite_flag:
            print('overwrite_flag set to False.  Set to True to overwrite.')
            return
        else:
            print('overwrite_flag set to True.  Overwriting . . .')

    # open datasets
    precip_da = xr.open_dataset(os.path.join(my_era5_path, 'dsw/NAM_precipitation.nc'))['precipitation']
    length_da = xr.open_dataset(os.path.join(my_era5_path, 'dsw/NAM_length.nc'))['length']

    # calculate precip rate
    precipitation_rate = precip_da/length_da

    # rename dataset and save to netcdf file
    precipitation_rate.rename('precipitation-rate').to_netcdf(out_fp)