#### Climatology of Buoyancy measures
1. how much BL deviation requires to kick off initialization of MCSs
2. whether BL ends at a state approaching to the climatology at that place

In [1]:
import os
import sys
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from datetime import datetime
from pathlib import Path

import cartopy.crs as ccrs
import cartopy.feature as cfeat
from cartopy.util import add_cyclic_point
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

from scipy.stats import linregress

import warnings

In [2]:
warnings.filterwarnings('ignore')

In [3]:
# set fonts configuration - Arial
matplotlib.rcParams['font.family'] = "Open Sans"
matplotlib.rcParams['font.sans-serif'] = "Arial"

In [4]:
# %%time
# # get climatology of BL
# BL_dir = Path('/neelin2020/ERA-5_buoy/layer_thetae/')

# year_list = np.arange(2002,2015)
# ds_BL_yr = []

# for year in year_list:
    
#     print('processing year: {}'.format(year))
#     ds_BL = []

#     for month in range(1,13):
        
#         files = sorted(list(BL_dir.glob('era5_2layers_thetae_{}_{}_*.nc'.format(year,str(month).zfill(2)))))
#         data = xr.open_mfdataset(files)

#         # read thetae components
#         thetae_bl = data.thetae_bl
#         thetae_sat_lt = data.thetae_sat_lt
#         thetae_lt = data.thetae_lt

#         delta_pl=400
#         delta_pb=100
#         wb=(delta_pb/delta_pl)*np.log((delta_pl+delta_pb)/delta_pb)
#         wl=1-wb

#         # calculate buoyancy estimate
#         Buoy_CAPE = (9.81/(340*3)) * wb * ((thetae_bl-thetae_sat_lt)/thetae_sat_lt) * 340
#         Buoy_CAPE_xr = Buoy_CAPE.mean('time').rename('Buoy_CAPE').to_dataset()
#         Buoy_SUBSAT = (9.81/(340*3))* wl * ((thetae_sat_lt-thetae_lt)/thetae_sat_lt) * 340
#         Buoy_SUBSAT_xr = Buoy_SUBSAT.mean('time').rename('Buoy_SUBSAT').to_dataset()
#         Buoy_TOT = Buoy_CAPE - Buoy_SUBSAT
#         Buoy_TOT_xr = Buoy_TOT.mean('time').rename('Buoy_TOT').to_dataset()

#         BL_measures_xr = xr.merge([Buoy_TOT_xr, Buoy_CAPE_xr, Buoy_SUBSAT_xr]).compute()

#         ds_BL.append(BL_measures_xr)
    
#     # climatology of monthly BL measures
#     ds_BL_yr.append(xr.concat(ds_BL, dim=pd.Index(np.arange(1,13), name='month')))
    
# ds_BL_climat = xr.concat(ds_BL_yr, dim=pd.Index(year_list, name='year'))

# # save derived dataset
# out_dir = Path('/neelin2020/mcs_flextrkr/mcs_stats/output_stats')
# ds_BL_climat.to_netcdf(out_dir / 'era5_BL_measures_monthly_avg.2002-2014.nc')

In [5]:
# landsea_mask = xr.open_dataset('/neelin2020/RGMA_feature_mask/ERA5_LandSeaMask_regrid.nc4').landseamask.sel(latitude=slice(-30,30))

#### 5-deg mean hourly BL,cape-subsat histogram

In [None]:
# %%time
# # get climatology of BL
# BL_dir = Path('/neelin2020/ERA-5_buoy/layer_thetae/')

# year_list = np.arange(2002,2015)
# ds_BL_yr = []

# BL_bins = np.arange(-35,10.5,0.5)
# samples = np.zeros((len(year_list), 12, 2, 2, len(BL_bins)-1)) # (year, month, grid_type, surface_type, samples)

# # load era-5 land sea mask
# landsea_mask = xr.open_dataset('/neelin2020/RGMA_feature_mask/ERA5_LandSeaMask_regrid.nc4').landseamask.sel(latitude=slice(-30,30))

# # interpolating variables into 5-deg 
# deg_binsize= 5
# lon_bins = np.arange(0,360+deg_binsize,deg_binsize)
# lat_bins = np.arange(-30,30+deg_binsize,deg_binsize)
# landsea_mask_regrid = landsea_mask.interp(longitude=lon_bins[:-1]+deg_binsize/2
#                                          ,latitude=lat_bins[:-1]+deg_binsize/2)
# landsea_mask_regrid = landsea_mask_regrid.rename({'latitude': 'lat'})
# landsea_mask_regrid = landsea_mask_regrid.rename({'longitude': 'lon'})

# landsea_mask_orig = landsea_mask.rename({'latitude': 'lat'})
# landsea_mask_orig = landsea_mask_orig.rename({'longitude': 'lon'})

# for y,year in enumerate(year_list):
    
#     print('processing year: {}'.format(year))

#     for m,month in enumerate(range(1,13)):
        
#         files = sorted(list(BL_dir.glob('era5_2layers_thetae_{}_{}_*.nc'.format(year,str(month).zfill(2)))))
        
#         for file in files:
            
#             try:
#                 data = xr.open_dataset(file).sel(lat=slice(-30,30)) # read a single file of instantaneous BL measure
#                 # read thetae components
#                 thetae_bl = data.thetae_bl
#                 thetae_sat_lt = data.thetae_sat_lt
#                 thetae_lt = data.thetae_lt

#                 delta_pl=400
#                 delta_pb=100
#                 wb=(delta_pb/delta_pl)*np.log((delta_pl+delta_pb)/delta_pb)
#                 wl=1-wb

#                 # calculate buoyancy estimate
#                 Buoy_CAPE = (9.81/(340*3)) * wb * ((thetae_bl-thetae_sat_lt)/thetae_sat_lt) * 340
#                 Buoy_SUBSAT = (9.81/(340*3))* wl * ((thetae_sat_lt-thetae_lt)/thetae_sat_lt) * 340

#                 Buoy_CAPE_regrid = Buoy_CAPE.interp(lat=lat_bins[:-1]+deg_binsize/2
#                                                  , lon=lon_bins[:-1]+deg_binsize/2) 
#                 Buoy_SUBSAT_regrid = Buoy_SUBSAT.interp(lat=lat_bins[:-1]+deg_binsize/2
#                                                  , lon=lon_bins[:-1]+deg_binsize/2) 
                
#                 # calculate buoyancy total (deg. K)
#                 Buoy_TOT_orig = (340*3)/9.81*(Buoy_CAPE.interp(lat=landsea_mask_orig.lat,lon=landsea_mask_orig.lon) 
#                                  - Buoy_SUBSAT.interp(lat=landsea_mask_orig.lat,lon=landsea_mask_orig.lon))
#                 Buoy_TOT_regrid = (340*3)/9.81*(Buoy_CAPE_regrid - Buoy_SUBSAT_regrid)
                
#                 # joint histograms for ocean
#                 Buoy_TOT_orig_ocean = Buoy_TOT_orig.where(landsea_mask_orig == 100)
#                 Buoy_TOT_orig_1d = Buoy_TOT_orig_ocean.values.ravel()
#                 Buoy_TOT_ocean = Buoy_TOT_regrid.where(landsea_mask_regrid == 100)
#                 Buoy_TOT_1d = Buoy_TOT_ocean.values.ravel()
                
#                 for i in range(len(BL_bins)-1):
#                     idx = np.where(np.logical_and(Buoy_TOT_orig_1d >= BL_bins[i], Buoy_TOT_orig_1d < BL_bins[i+1]))[0]
#                     samples[y,m,0,0,i] += len(idx) # (year,month,grid,surface_type,bins)
                        
#                 for i in range(len(BL_bins)-1):
#                     idx = np.where(np.logical_and(Buoy_TOT_1d >= BL_bins[i], Buoy_TOT_1d < BL_bins[i+1]))[0]
#                     samples[y,m,1,0,i] += len(idx)

#                 # joint histograms for land
#                 Buoy_TOT_orig_land = Buoy_TOT_orig.where(landsea_mask_orig < 100)
#                 Buoy_TOT_orig_1d = Buoy_TOT_orig_land.values.ravel()
#                 Buoy_TOT_land = Buoy_TOT_regrid.where(landsea_mask_regrid < 100)
#                 Buoy_TOT_1d = Buoy_TOT_land.values.ravel()
                
#                 for i in range(len(BL_bins)-1):
#                     idx = np.where(np.logical_and(Buoy_TOT_orig_1d >= BL_bins[i], Buoy_TOT_orig_1d < BL_bins[i+1]))[0]
#                     samples[y,m,0,1,i] += len(idx)
                
#                 for i in range(len(BL_bins)-1):
#                     idx = np.where(np.logical_and(Buoy_TOT_1d >= BL_bins[i], Buoy_TOT_1d < BL_bins[i+1]))[0]
#                     samples[y,m,1,1,i] += len(idx)
                
#             except:
#                 print('error file : {}'.format(file.name))
    
# # writeout as xarray dataset    
# ds_BL_climat = xr.Dataset(data_vars=dict(samples=(['year','month','grid_type','surface_type','BL_bins'],samples)),
#                           coords=dict(year=(['year'], year_list),
#                                       grid_type=(['grid_type'], ['0.25-deg','5-deg']),
#                                       surface_type=(['surface_type'], ['ocean','land']),
#                                       month=(['month'],np.arange(1,13)),
#                                       BL_bins=(['BL_bins'], BL_bins[:-1])))

# # # save derived dataset
# out_dir = Path('/scratch/wmtsai/temp_mcs/output_stats')
# ds_BL_climat.to_netcdf(out_dir / 'era5_BL_1Dhistogram.landsea.2002-2014.nc')

In [12]:
%%time
# get climatology of BL
BL_dir = Path('/neelin2020/ERA-5_buoy/layer_thetae/')

year_list = np.arange(2002,2010)
ds_BL_yr = []

BL_cape_bins = np.arange(-15,10,0.5)
BL_subsat_bins = np.arange(-5,25,0.5)
samples = np.zeros((len(year_list), 12, 2, len(BL_cape_bins)-1, len(BL_subsat_bins)-1)) # (year, month, surface_type, samples)

# load era-5 land sea mask
landsea_mask = xr.open_dataset('/neelin2020/RGMA_feature_mask/ERA5_LandSeaMask_regrid.nc4').landseamask.sel(latitude=slice(-30,30))

# interpolating variables into 3-deg 
#deg_binsize= 0.25
deg_binsize = 3
lon_bins = np.arange(0,360+deg_binsize,deg_binsize)
lat_bins = np.arange(-30,30+deg_binsize,deg_binsize)
landsea_mask_regrid = landsea_mask.interp(longitude=lon_bins[:-1]+deg_binsize/2
                                         ,latitude=lat_bins[:-1]+deg_binsize/2)
landsea_mask_regrid = landsea_mask_regrid.rename({'latitude': 'lat'})
landsea_mask_regrid = landsea_mask_regrid.rename({'longitude': 'lon'})

for y,year in enumerate(year_list):
    
    print('processing year: {}'.format(year))
    ds_BL = []

    for m,month in enumerate(range(1,13)):
        
        files = sorted(list(BL_dir.glob('era5_2layers_thetae_{}_{}_*.nc'.format(year,str(month).zfill(2)))))
        
        for file in files: # files listed by day 
            
            try:
                data = xr.open_dataset(file).sel(lat=slice(-30,30)) # read a single file of instantaneous BL measure
                # read thetae components
                thetae_bl = data.thetae_bl
                thetae_sat_lt = data.thetae_sat_lt
                thetae_lt = data.thetae_lt

                delta_pl=400
                delta_pb=100
                wb=(delta_pb/delta_pl)*np.log((delta_pl+delta_pb)/delta_pb)
                wl=1-wb

                # calculate buoyancy estimate
                Buoy_CAPE = (9.81/(340*3)) * wb * ((thetae_bl-thetae_sat_lt)/thetae_sat_lt) * 340
                Buoy_SUBSAT = (9.81/(340*3))* wl * ((thetae_sat_lt-thetae_lt)/thetae_sat_lt) * 340

                Buoy_CAPE_regrid = ((340*3)/9.81/wb)*Buoy_CAPE.interp(lat=lat_bins[:-1]+deg_binsize/2
                                                 , lon=lon_bins[:-1]+deg_binsize/2) # deg. K
                Buoy_SUBSAT_regrid = ((340*3)/9.81/wl)*Buoy_SUBSAT.interp(lat=lat_bins[:-1]+deg_binsize/2
                                                 , lon=lon_bins[:-1]+deg_binsize/2) # deg. K
                
                # joint histograms for ocean
                Buoy_CAPE_ocean = Buoy_CAPE_regrid.where(landsea_mask_regrid == 100)
                Buoy_CAPE_1d = Buoy_CAPE_ocean.values.ravel()
                Buoy_SUBSAT_ocean = Buoy_SUBSAT_regrid.where(landsea_mask_regrid == 100)
                Buoy_SUBSAT_1d = Buoy_SUBSAT_ocean.values.ravel()
                
                for i in range(len(BL_cape_bins)-1):
                    idx = np.where(np.logical_and(Buoy_CAPE_1d >= BL_cape_bins[i], Buoy_CAPE_1d < BL_cape_bins[i+1]))[0]
                    for j in range(len(BL_subsat_bins)-1):
                        idy = np.where(np.logical_and(Buoy_SUBSAT_1d >= BL_subsat_bins[j], Buoy_SUBSAT_1d < BL_subsat_bins[j+1]))[0]
                        idx_com = np.intersect1d(idx,idy)
                        samples[y,m,0,i,j] += len(idx_com)

                # joint histograms for land
                Buoy_CAPE_land = Buoy_CAPE_regrid.where(landsea_mask_regrid < 100)
                Buoy_CAPE_1d = Buoy_CAPE_land.values.ravel()
                Buoy_SUBSAT_land = Buoy_SUBSAT_regrid.where(landsea_mask_regrid < 100)
                Buoy_SUBSAT_1d = Buoy_SUBSAT_land.values.ravel()
                
                for i in range(len(BL_cape_bins)-1):
                    idx = np.where(np.logical_and(Buoy_CAPE_1d >= BL_cape_bins[i], Buoy_CAPE_1d < BL_cape_bins[i+1]))[0]
                    for j in range(len(BL_subsat_bins)-1):
                        idy = np.where(np.logical_and(Buoy_SUBSAT_1d >= BL_subsat_bins[j], Buoy_SUBSAT_1d < BL_subsat_bins[j+1]))[0]
                        idx_com = np.intersect1d(idx,idy)
                        samples[y,m,1,i,j] += len(idx_com)
                
            except:
                print('error file : {}'.format(file.name))
    
# writeout as xarray dataset    
ds_BL_climat = xr.Dataset(data_vars=dict(samples=(['year','month','surface_type','BL_cape_bins','BL_subsat_bins'],samples)),
                          coords=dict(year=(['year'], year_list),
                                      surface_type=(['surface_type'], ['ocean','land']),
                                      month=(['month'],np.arange(1,13)),
                                      BL_subsat_bins=(['BL_subsat_bins'], BL_subsat_bins[:-1]),
                                      BL_cape_bins=(['BL_cape_bins'],BL_cape_bins[:-1])))

processing year: 2002
processing year: 2003
processing year: 2004
processing year: 2005
processing year: 2006
processing year: 2007
processing year: 2008
processing year: 2009
CPU times: user 27min 2s, sys: 1min 23s, total: 28min 25s
Wall time: 48min 36s


In [13]:
# # save derived dataset
out_dir = Path('/scratch/wmtsai/temp_mcs/output_stats')
ds_BL_climat.to_netcdf(out_dir / 'era5_BL_3deg_2Dhistogram.landsea.2002-2014.nc')

#### initial phase of MCSs

In [None]:
# %%time
# # extract BL at the Initial phase
# year_list = np.arange(2010,2020)

# lon_bins = np.arange(0,365,5)
# lat_bins = np.arange(-30,35,5)
# buoy_tot_map = np.zeros((4,12,len(lat_bins)-1,len(lon_bins)-1)) # (duration_type, month, lat, lon)
# buoy_cape_map = np.copy(buoy_tot_map) # (duration_type, month, lat, lon)
# buoy_subsat_map = np.copy(buoy_tot_map) # (duration_type, month, lat, lon)
# samples_map = np.copy(buoy_tot_map) # (duration_type, month, lat, lon)

# for year in year_list:
    
#     print('processing year: {}'.format(year))
    
#     data_env_dir =Path('/neelin2020/mcs_flextrkr/mcs_stats/envs_track/{}/tropics'.format(year))
#     data_track_dir = Path('/neelin2020/mcs_flextrkr/mcs_stats/mcs_tracks_non2mcs')
    
#     # load track data 
#     data_track = xr.open_dataset(data_track_dir / 'mcs_tracks_non2mcs_{}.tropics30NS.nc'.format(year))
    
#     # seperate by MCS duration: S, M, L, U
#     mcs_duration = data_track.mcs_duration.values
#     idx_SL = np.where(mcs_duration <= 6)[0]
#     idx_ML = np.where(np.logical_and(mcs_duration > 6, mcs_duration < 12))[0]
#     idx_LL = np.where(np.logical_and(mcs_duration >= 12, mcs_duration < 18))[0]
#     idx_UL = np.where(np.logical_and(mcs_duration >= 18, mcs_duration < 24))[0]
    
#     for n,idx in enumerate([idx_SL, idx_ML, idx_LL, idx_UL]):
        
#         tracks = data_track.isel(tracks=idx).tracks # get corresponding track number
        
#         for track in tracks.values:
        
#             data_sub = xr.open_dataset(data_env_dir / 'mcs_era5_3D_envs_{}.{}.LD.nc'.format(year,str(track).zfill(5)))
#             # mean BL at initial phase
#             BL_tot_init = 340*3/9.81*data_sub.sel(mcs_phase='Init').Buoy_TOT
#             BL_tot_5deg = BL_tot_init.sel(x=slice(10,30),y=slice(10,30)).mean(('x','y')).values
#             BL_cape_init = 340*3/9.81*data_sub.sel(mcs_phase='Init').Buoy_CAPE
#             BL_cape_5deg = BL_cape_init.sel(x=slice(10,30),y=slice(10,30)).mean(('x','y')).values
#             BL_subsat_init = 340*3/9.81*data_sub.sel(mcs_phase='Init').Buoy_SUBSAT
#             BL_subsat_5deg = BL_subsat_init.sel(x=slice(10,30),y=slice(10,30)).mean(('x','y')).values
            
#             # geoinfo at initial phase
#             track_sub = data_track.sel(tracks=track)
#             idt_init = track_sub.idt_mcs_init.values
#             meanlon = track_sub.sel(times=idt_init).meanlon.values
#             meanlat = track_sub.sel(times=idt_init).meanlat.values
#             month = pd.to_datetime(track_sub.sel(times=idt_init).base_time.values).month
            
#             # convert lon
#             if meanlon < 0:
#                 meanlon = meanlon + 360
            
#             idx_lon = np.argmin(abs(meanlon-(lon_bins[:-1]+2.5)))
#             idx_lat = np.argmin(abs(meanlat-(lat_bins[:-1]+2.5)))
            
#             buoy_tot_map[n,month-1,idx_lat,idx_lon] += BL_tot_5deg
#             buoy_cape_map[n,month-1,idx_lat,idx_lon] += BL_cape_5deg
#             buoy_subsat_map[n,month-1,idx_lat,idx_lon] += BL_subsat_5deg
#             samples_map[n,month-1,idx_lat,idx_lon] += 1
            

In [None]:
# # write as xarray dataset
# out_dir = Path('/neelin2020/mcs_flextrkr/mcs_stats/output_stats')
# BL_TOT_MCSint = xr.Dataset(data_vars = dict(buoy_tot_int = (['duration_type','month','lat','lon'], buoy_tot_map),
#                                             buoy_cape_int = (['duration_type','month','lat','lon'], buoy_cape_map),
#                                             buoy_subsat_int = (['duration_type','month','lat','lon'], buoy_subsat_map),
#                                           samples = (['duration_type','month','lat','lon'], samples_map)),
#                          coords = dict(duration_type = (['duration_type'], ['SL','ML','LL','UL']),
#                                        month = (['month'], np.arange(1,13)),
#                                        lat = (['lat'], lat_bins[:-1] + 2.5),
#                                        lon = (['lon'], lon_bins[:-1] + 2.5))
#                         )

# # save dataset
# BL_TOT_MCSint.to_netcdf(out_dir / 'era5_BL_measures_MCSint_2010-2019_5deg_4DType.nc')

#### end phase of MCSs

In [None]:
# %%time
# # extract BL at the Initial phase
# year_list = np.arange(2010,2020)

# lon_bins = np.arange(0,365,5)
# lat_bins = np.arange(-30,35,5)
# buoy_tot_map = np.zeros((4,12,len(lat_bins)-1,len(lon_bins)-1)) # (duration_type, month, lat, lon)
# buoy_cape_map = np.copy(buoy_tot_map) # (duration_type, month, lat, lon)
# buoy_subsat_map = np.copy(buoy_tot_map) # (duration_type, month, lat, lon)
# samples_map = np.copy(buoy_tot_map) # (duration_type, month, lat, lon)

# for year in year_list:
    
#     print('processing year: {}'.format(year))
    
#     data_env_dir =Path('/neelin2020/mcs_flextrkr/mcs_stats/envs_track/{}/tropics'.format(year))
#     data_track_dir = Path('/neelin2020/mcs_flextrkr/mcs_stats/mcs_tracks_non2mcs')
    
#     # load track data 
#     data_track = xr.open_dataset(data_track_dir / 'mcs_tracks_non2mcs_{}.tropics30NS.nc'.format(year))
    
#     # seperate by MCS duration: S, M, L, U
#     mcs_duration = data_track.mcs_duration.values
#     idx_SL = np.where(mcs_duration <= 6)[0]
#     idx_ML = np.where(np.logical_and(mcs_duration > 6, mcs_duration < 12))[0]
#     idx_LL = np.where(np.logical_and(mcs_duration >= 12, mcs_duration < 18))[0]
#     idx_UL = np.where(np.logical_and(mcs_duration >= 18, mcs_duration < 24))[0]
    
#     for n,idx in enumerate([idx_SL, idx_ML, idx_LL, idx_UL]):
        
#         tracks = data_track.isel(tracks=idx).tracks # get corresponding track number
        
#         for track in tracks.values:
        
#             data_sub = xr.open_dataset(data_env_dir / 'mcs_era5_3D_envs_{}.{}.LD.nc'.format(year,str(track).zfill(5)))
#             # mean BL at end phase
#             BL_tot_end = 340*3/9.81*data_sub.sel(mcs_phase='End').Buoy_TOT
#             BL_tot_5deg = BL_tot_end.sel(x=slice(10,30),y=slice(10,30)).mean(('x','y')).values
#             BL_cape_end = 340*3/9.81*data_sub.sel(mcs_phase='End').Buoy_CAPE
#             BL_cape_5deg = BL_cape_end.sel(x=slice(10,30),y=slice(10,30)).mean(('x','y')).values
#             BL_subsat_end = 340*3/9.81*data_sub.sel(mcs_phase='End').Buoy_SUBSAT
#             BL_subsat_5deg = BL_subsat_end.sel(x=slice(10,30),y=slice(10,30)).mean(('x','y')).values
            
#             # geoinfo at initial phase
#             track_sub = data_track.sel(tracks=track)
#             idt_end = track_sub.idt_mcs_end.values
#             meanlon = track_sub.sel(times=idt_end).meanlon.values
#             meanlat = track_sub.sel(times=idt_end).meanlat.values
#             month = pd.to_datetime(track_sub.sel(times=idt_end).base_time.values).month
            
#             # convert lon
#             if meanlon < 0:
#                 meanlon = meanlon + 360
            
#             idx_lon = np.argmin(abs(meanlon-(lon_bins[:-1]+2.5)))
#             idx_lat = np.argmin(abs(meanlat-(lat_bins[:-1]+2.5)))
            
#             buoy_tot_map[n,month-1,idx_lat,idx_lon] += BL_tot_5deg
#             buoy_cape_map[n,month-1,idx_lat,idx_lon] += BL_cape_5deg
#             buoy_subsat_map[n,month-1,idx_lat,idx_lon] += BL_subsat_5deg
#             samples_map[n,month-1,idx_lat,idx_lon] += 1

In [None]:
# # write as xarray dataset
# out_dir = Path('/neelin2020/mcs_flextrkr/mcs_stats/output_stats')
# BL_TOT_MCSend = xr.Dataset(data_vars = dict(buoy_tot_int = (['duration_type','month','lat','lon'], buoy_tot_map),
#                                             buoy_cape_int = (['duration_type','month','lat','lon'], buoy_cape_map),
#                                             buoy_subsat_int = (['duration_type','month','lat','lon'], buoy_subsat_map),
#                                           samples = (['duration_type','month','lat','lon'], samples_map)),
#                          coords = dict(duration_type = (['duration_type'], ['SL','ML','LL','UL']),
#                                        month = (['month'], np.arange(1,13)),
#                                        lat = (['lat'], lat_bins[:-1] + 2.5),
#                                        lon = (['lon'], lon_bins[:-1] + 2.5))
#                         )

# # save dataset
# BL_TOT_MCSend.to_netcdf(out_dir / 'era5_BL_measures_MCSend_2010-2019_5deg_4DType.nc')

In [None]:
# %%time
# # get climatology of cwv
# cwv_dir = Path('/neelin2020/ERA-5_cwv_That')

# year_list = np.arange(2002,2015)
# ds_cwv_yr = []

# for year in year_list:
    
#     print('processing year: {}'.format(year))
#     ds_cwv = []

#     for month in range(1,13):
        
#         files = sorted(list(cwv_dir.glob('era5_cwv_that_{}_{}_*.nc'.format(year,str(month).zfill(2)))))
#         data = xr.open_mfdataset(files)

#         # read cwv (surface to 200 hPa)
#         cwv_monthly = data.cwv.mean('time').compute()
#         ds_cwv.append(cwv_monthly)
    
#     # climatology of monthly CWV
#     ds_cwv_yr.append(xr.concat(ds_cwv, dim=pd.Index(np.arange(1,13), name='month')))
    
# ds_cwv_climat = xr.concat(ds_cwv_yr, dim=pd.Index(year_list, name='year'))

# # save derived dataset
# out_dir = Path('/neelin2020/mcs_flextrkr/mcs_stats/output_stats')
# ds_cwv_climat.mean('year').to_netcdf(out_dir / 'era5_cwv_climatology_2002-2014.nc')

In [None]:
# xr.open_dataset(out_dir / 'era5_BL_measures_monthly_avg.2002-2014.nc').sel(year=2002,month=1)