In [1]:
# import functions
# OS interaction and time
import os
import sys
import cftime
import datetime
import time
import glob
import dask
import dask.bag as db
import calendar
import importlib

# math and data
import math
import numpy as np
import netCDF4 as nc
import xarray as xr
import scipy as sp
import scipy.linalg
from scipy.signal import detrend
import pandas as pd
import pickle as pickle
from sklearn import linear_model
import matplotlib.patches as mpatches
from shapely.geometry.polygon import LinearRing
import statsmodels.stats.multitest as multitest

# plotting
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import ticker
import matplotlib.colors as mcolors
from matplotlib.gridspec import GridSpec
import matplotlib.image as mpimg
from matplotlib.colors import TwoSlopeNorm
import matplotlib.cm as cm

from matplotlib.ticker import FormatStrFormatter
from mpl_toolkits.axes_grid1.axes_divider import HBoxDivider
import mpl_toolkits.axes_grid1.axes_size as Size
from mpl_toolkits.axes_grid1 import make_axes_locatable

import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.util import add_cyclic_point

# random
from IPython.display import display
from IPython.display import HTML
import IPython.core.display as di # Example: di.display_html('<h3>%s:</h3>' % str, raw=True)

my_era5_path = '/glade/u/home/zcleveland/scratch/ERA5/'  # path to subset data
misc_data_path = '/glade/u/home/zcleveland/scratch/misc_data/'  # path to misc data
plot_out_path = '/glade/u/home/zcleveland/NAM_soil-moisture/ERA5_analysis/plots/'  # path to generated plots
scripts_main_path = '/glade/u/home/zcleveland/NAM_soil-moisture/scripts_main/'  # path to my dicts, lists, and functions

# import variable lists and dictionaries
if scripts_main_path not in sys.path:
    sys.path.insert(0, scripts_main_path)  # path to file containing these lists/dicts
if 'get_var_data' in sys.modules:
    importlib.reload(sys.modules['get_var_data'])
if 'my_functions' in sys.modules:
    importlib.reload(sys.modules['my_functions'])
if 'my_dictionaries' in sys.modules:
    importlib.reload(sys.modules['my_dictionaries'])
if 'order_years' in sys.modules:
    importlib.reload(sys.modules['order_years'])

# import common functions that I've created
from get_var_data import get_var_data, get_var_files, open_var_data, subset_var_data, time_to_year_month_avg, time_to_year_month_sum, time_to_year_month
from my_functions import month_num_to_name, ensure_var_list
from order_years import *  # order_years(var, months, **kwargs)

# import lists and dictionaries
import my_dictionaries
# my lists
sfc_instan_list = my_dictionaries.sfc_instan_list  # instantaneous surface variables
sfc_accumu_list = my_dictionaries.sfc_accumu_list  # accumulated surface variables
pl_var_list = my_dictionaries.pl_var_list  # pressure level variables
invar_var_list = my_dictionaries.invar_var_list  # invariant variables
NAM_var_list = my_dictionaries.NAM_var_list  # NAM-based variables
region_avg_list = my_dictionaries.region_avg_list  # region IDs for regional averages
flux_var_list = my_dictionaries.flux_var_list  # flux variables that need to be flipped (e.g., sensible heat so that it's positive up instead of down
misc_var_list = my_dictionaries.misc_var_list  # misc variables
# my dictionaries
var_dict = my_dictionaries.var_dict  # variables and their names
var_units = my_dictionaries.var_units  # variable units
region_avg_dict = my_dictionaries.region_avg_dict  # region IDs and names
region_avg_coords = my_dictionaries.region_avg_coords  # coordinates for regions
region_colors_dict = my_dictionaries.region_colors_dict  # colors to plot for each region

In [3]:
def convert_longitude(ds):
    ds = ds.assign_coords(longitude=((ds.longitude + 360) % 360))
    ds = ds.sortby(ds.longitude)
    return ds

In [10]:
def subset_daily_avg(ds, var):
    # get var name from ds
    var_name = [v for v in ds.data_vars.keys() if f'{var.upper()}' in v.upper()][0]
    # convert to Data Array and resample to daily
    da = ds[var_name].resample(time='1D')

    # average
    da_avg = da.mean('time', skipna=True)
    da_avg = da_avg.rename(f'{var_name}_AVG')
    # max
    da_max = da.max('time', skipna=True)
    da_max = da_max.rename(f'{var_name}_MAX')
    # min
    da_min = da.min('time', skipna=True)
    da_min = da_min.rename(f'{var_name}_MIN')

    # merge data
    return xr.merge([da_avg, da_max, da_min])

In [8]:
def process_files(var, in_fp, out_fp):
    ds = xr.open_dataset(in_fp)
    ds_converted = convert_longitude(ds)
    ds_daily = subset_daily_avg(ds_converted, var)
    ds_daily.to_netcdf(out_fp)

In [11]:
# run the code
if __name__ == '__main__':
    years = np.arange(1980,2020)
    for year in years:
        print(f'year: {year}')
        in_fp = f'{my_era5_path}temp/cin_{year}.nc'
        out_fp = f'{my_era5_path}dsw/{year}/cin_{year}01_{year}12_dsw.nc'
        process_files('cin', in_fp, out_fp)

year: 1980
year: 1981
year: 1982
year: 1983
year: 1984
year: 1985
year: 1986
year: 1987
year: 1988
year: 1989
year: 1990
year: 1991
year: 1992
year: 1993
year: 1994
year: 1995
year: 1996
year: 1997
year: 1998
year: 1999
year: 2000
year: 2001
year: 2002
year: 2003
year: 2004
year: 2005
year: 2006
year: 2007
year: 2008
year: 2009
year: 2010
year: 2011
year: 2012
year: 2013
year: 2014
year: 2015
year: 2016
year: 2017
year: 2018
year: 2019
