In [1]:
# import functions
# OS interaction and time
import os
import sys
import cftime
import datetime
import time
import glob
import dask
import dask.bag as db
import calendar

# math and data
import numpy as np
import netCDF4 as nc
import xarray as xr
import scipy as sp
import pandas as pd
import pickle as pickle
from sklearn import linear_model
import matplotlib.patches as mpatches
from shapely.geometry.polygon import LinearRing
import statsmodels.stats.multitest as multitest

# plotting
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import ticker
import matplotlib.colors as mcolors
from matplotlib.gridspec import GridSpec

from matplotlib.ticker import FormatStrFormatter
from mpl_toolkits.axes_grid1.axes_divider import HBoxDivider
import mpl_toolkits.axes_grid1.axes_size as Size
from mpl_toolkits.axes_grid1 import make_axes_locatable

import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.util import add_cyclic_point

# random
from IPython.display import display
from IPython.display import HTML
import IPython.core.display as di # Example: di.display_html('<h3>%s:</h3>' % str, raw=True)

In [2]:
data_in_path = '/glade/u/home/zcleveland/scratch/ERA5/dsw/' # path to subsetted data
sub_script_path = '/glade/u/home/zcleveland/ERA5_analysis/scripts/subsetting/' # path to subsetting scripts
plot_script_path = '/glade/u/home/zcleveland/ERA5_analysis/scripts/plotting/' # path to plotting scripts
fig_out_path = '/glade/u/home/zcleveland/ERA5_analysis/figures/' # path to generated figures
temp_scratch_path = '/glade/u/home/zcleveland/ERA5_analysis/temp/' # path to temp directory in scratch

In [3]:
# variable list to choose
var_list = [
    # 'lsp', # large scale precipitation (m of water) - accumu
    # 'cp', # convective precipitation (m of water) - accumu
    # 'tp', # total precipitation (m of water) - accumu -- DERIVED
    # 'sd', # snow depth  (m of water equivalent) - instan
    # 'msl', # mean sea level pressure (Pa) - instan
    # 'tcc', # total cloud cover (0-1) - instan
    # 'stl1', # soil temp layer 1 (K) - instan
    # 'stl2', # soil temp layer 2 (K) - instan
    # 'stl3', # soil temp layer 3 (K) - instan
    # 'stl4', # soil temp layer 4 (K) - instan
    # 'swvl1', # soil volume water content layer 1 (m^3 m^-3) - instan
    # 'swvl2', # soil volume water content layer 2 (m^3 m^-3) - instan
    # 'swvl3', # soil volume water content layer 3 (m^3 m^-3) - instan
    # 'swvl4', # soil volume water content layer 4 (m^3 m^-3) - instan
    # '2t', # 2 meter temp (K) - instan
    # '2d', # 2 meter dew point (K) - instan
    # 'ishf', # instant surface heat flux (W m^-2) - instan
    # 'ie', # instant moisture flux (kg m^-2 s^-1) - instan
    # 'sshf', # surface sensible heat flux (J m^-2) - accumu
    # 'slhf', # surface latent heat flux (J m^-2) - accumu
    # 'ssr', # surface net solar radiation (J m^-2) - accumu
    # 'str', # surface net thermal radiation (J m^-2) - accumu
    # 'sro', # surface runoff (m) - accumu
    # 'sf', # total snowfall (m of water equivalent) - accumu
    # 'cape', # convective available potential energy (J kg^-1) - instan
    'tcw', # total column water (kg m^-2) - sfc (sum total of solid, liquid, and vapor in a column)
]

In [6]:
# calculate 25th and 75th quantile, median, and mean for the max and min annual
# total column water values over the dsw

# initialize data lists
max_list = []
min_list = []

# open datasets
files = glob.glob(f'{data_in_path}*/tcw_*_dsw.nc')
files.sort()
tcw = xr.open_mfdataset(files)

# calculate yearly min/max
min_min = tcw['TCW_AVG'].groupby('time.year').min()
max_max = tcw['TCW_AVG'].groupby('time.year').max()

# rechunk the data array along the 'year' dimension into a single chunk
min_min = min_min.chunk({'year': -1})
max_max = max_max.chunk({'year': -1})

# create datasets for tcw_min and tcw_max
tcw_min = xr.Dataset()
tcw_max = xr.Dataset()

# calculate statistics on min/max lists and store them into new datasets

# min
tcw_min['Q25'] = min_min.quantile(0.25, dim='year') # 25th percentile
tcw_min['Q75'] = min_min.quantile(0.75, dim='year') # 75th percentile
tcw_min['MEDIAN'] = min_min.quantile(0.5, dim='year') # median
tcw_min['MEAN'] = min_min.mean(dim='year') # mean

# max
tcw_max['Q25'] = max_max.quantile(0.25, dim='year') # 25th percentile
tcw_max['Q75'] = max_max.quantile(0.75, dim='year') # 75th percentile
tcw_max['MEDIAN'] = max_max.quantile(0.5, dim='year') # median
tcw_max['MEAN'] = max_max.mean(dim='year') # mean

# save datasets to netcdf files
print(tcw_min)
print(tcw_max)
tcw_min.to_netcdf(f'{data_in_path}tcw_min_stats.nc')
tcw_max.to_netcdf(f'{data_in_path}tcw_max_stats.nc')

<xarray.Dataset>
Dimensions:    (latitude: 81, longitude: 81)
Coordinates:
  * latitude   (latitude) float64 40.0 39.75 39.5 39.25 ... 20.5 20.25 20.0
  * longitude  (longitude) float64 240.0 240.2 240.5 240.8 ... 259.5 259.8 260.0
    quantile   float64 0.25
Data variables:
    Q25        (latitude, longitude) float64 dask.array<chunksize=(81, 81), meta=np.ndarray>
    Q75        (latitude, longitude) float64 dask.array<chunksize=(81, 81), meta=np.ndarray>
    MEDIAN     (latitude, longitude) float64 dask.array<chunksize=(81, 81), meta=np.ndarray>
    MEAN       (latitude, longitude) float32 dask.array<chunksize=(81, 81), meta=np.ndarray>
<xarray.Dataset>
Dimensions:    (latitude: 81, longitude: 81)
Coordinates:
  * latitude   (latitude) float64 40.0 39.75 39.5 39.25 ... 20.5 20.25 20.0
  * longitude  (longitude) float64 240.0 240.2 240.5 240.8 ... 259.5 259.8 260.0
    quantile   float64 0.25
Data variables:
    Q25        (latitude, longitude) float64 dask.array<chunksize=(81, 81), 