In [1]:
import os
import sys
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from datetime import datetime
from pathlib import Path

import cartopy.crs as ccrs
import cartopy.feature as cfeat
from cartopy.util import add_cyclic_point
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

import warnings

In [2]:
warnings.filterwarnings('ignore')

In [3]:
def get_files_HCC(year, corr_temp_cri, corr_space_cri):
    
    data = xr.open_dataset(featstats_dir / 'featstats_tracks_non2mcs_{}.tropics30NS.extend.nc'.format(year))

    corr_coeff_temp = data.corr_coeff_temp
    corr_coeff_space = data.corr_coeff_space.mean('mcs_phase')

    cond_1 = corr_coeff_temp > corr_temp_cri
    cond_2 = corr_coeff_space > corr_space_cri
    track_sel = data.isel(tracks=np.where(cond_1 & cond_2)[0]).tracks

    files_HCC = []
    for track in track_sel.values:
        files_HCC.extend(sorted(list(dir_envs_track.glob('mcs_era5_3D_envs_{}.{}.LD.nc'.format(year
                                                                    , str(track).zfill(5))))))
    return files_HCC

In [4]:
def get_files_duration(year, duration_min, duration_max):

    data = xr.open_dataset(featstats_dir / 'featstats_tracks_non2mcs_{}.tropics30NS.extend.nc'.format(year))

    mcs_duration = data.mcs_duration

    cond_1 = mcs_duration >= duration_min
    cond_2 = mcs_duration < duration_max
    track_sel = data.isel(tracks=np.where(cond_1 & cond_2)[0]).tracks

    files_HCC = []
    for track in track_sel.values:
        files_HCC.extend(sorted(list(dir_envs_track.glob('mcs_era5_3D_envs_{}.{}.LD.nc'.format(year
                                                                    , str(track).zfill(5))))))
    return files_HCC

In [5]:
def get_files_landsea(year, sampling_opt='all'):

    """
    sampling option to filter out MCS tracks by genesis locations: 'all', 'ocean', 'land'
    """

    data = xr.open_dataset(mcs_dir / 'mcs_tracks_non2mcs_{}.tropics30NS.extend.nc'.format(year))

    idt_mcs_init = data.idt_mcs_init
    landsea_flag = data.landsea_flag.sel(times=idt_mcs_init)
    if sampling_opt == 'all':
        track_sel = data.tracks
    elif sampling_opt == 'land':
        idx_sel = np.where(landsea_flag == 1)[0]
        track_sel = data.isel(tracks=idx_sel).tracks
    elif sampling_opt == 'ocean':
        idx_sel = np.where(landsea_flag == 0)[0]
        track_sel = data.isel(tracks=idx_sel).tracks

    files_HCC = []
    for track in track_sel.values:
        files_HCC.extend(sorted(list(dir_envs_track.glob('mcs_era5_3D_envs_{}.{}.LD.nc'.format(year
                                                                    , str(track).zfill(5))))))
    return files_HCC

In [6]:
def vars_mcs_env(fid_envs_track):
    """
    input: processed envs_track file containing MCS feature mask and 2D/3D variables on ERA-5 coordinates
    return: spatial averaged of variables associated with MCS/non-MCS grids (mcs/env)
    """

    data = xr.open_dataset(fid_envs_track) # get selected variables
    # BL associated with mcs / non-mcs grids
    mcs_mask = data.cloudtracknumber_nomergesplit # binary mask
    data_mcs = data.where(mcs_mask > 0).mean(('x','y'))
    data_env = data.where(mcs_mask == 0).mean(('x','y'))
    data_5deg = data.sel(x=slice(10,30),y=slice(10,30)).mean(('x','y'))
    data_3deg = data.sel(x=slice(14,26),y=slice(14,26)).mean(('x','y'))
    
    data_merged = xr.concat([data_mcs, data_env, data_5deg, data_3deg], pd.Index(['mcs_mean','nonmcs_mean','5deg_mean','3deg_mean'],name='area_type'))

    return data_merged

In [9]:
%%time
# separate by mcs duration groups

year_list = np.arange(2001,2021)

########  parameters for filtering MCS tracks  ########
corr_temp_cri = -999. # temporal correlation between the mean values of ERA-5 and GPM precip during the evolution
corr_space_cri = -999. # mean spatial correlation between ERA-5 and GPM precip. 2-D maps during the evolution
sampling_opt = 'land' # MCS geolocation: 'all','ocean','land'
######################################################33

print('corre_temp_cri: {}'.format(corr_temp_cri))
print('corre_space_cri: {}'.format(corr_space_cri))
print('sampling_opt: {}'.format(sampling_opt))

mcs_dir = Path('/scratch/wmtsai/temp_mcs/mcs_stats/mcs_tracks_non2mcs/')
featstats_dir = Path('/scratch/wmtsai/temp_mcs/mcs_stats/mcs_tracks_non2mcs/tracks_area_mean/')

vars_dtype_merged = []
for (dmin, dmax, duration_type) in zip([5,6,12,18,24],[6,12,18,24,200],
                                       ['SL','ML','LL','UL','UUL']):
    files_multiyr = []
    for year in year_list:
        
        dir_envs_track = Path('/scratch/wmtsai/temp_mcs/mcs_stats/envs_track/{}/tropics_extend/'.format(year))
        # selecting files
        # 1. filtered by spatial-temporal precipitation coherency between ERA-5 and GPM
        files_HCC = get_files_HCC(year, corr_temp_cri, corr_space_cri)
        # 2. filtered by genesis location: 'all','ocean','land'
        files_geoloc = get_files_landsea(year, sampling_opt)
        # 3. grouping by MCS duration
        files_duration = get_files_duration(year, duration_min=dmin, duration_max=dmax)

        files_tmp = list(set(files_HCC).intersection(files_duration))
        files_comb = list(set(files_tmp).intersection(files_geoloc))
        
        files_multiyr.extend(files_comb)
        
    print('number of selected tracks during {}-{}: {}'.format(year_list[0],year_list[-1],len(files_multiyr)))
    
    data_tracks = []
    for file in files_multiyr:
        data_out = vars_mcs_env(file)
        data_tracks.append(data_out)
    data_tracks_xr = xr.concat(data_tracks, pd.Index(np.arange(len(files_multiyr)), name='total_tracks'))
    data_tracks_xr = data_tracks_xr.assign_attrs(description='MCSs during {}-{}'.format(year_list[0], year_list[-1]),
                                                 surface_type='{}'.format(sampling_opt),
                                                 duration_min= '{} hours'.format(dmin),
                                                 duration_max= '{} hours'.format(dmax))
    
    # placeholder for all duration types
    vars_dtype_merged.append(data_tracks_xr)

corre_temp_cri: -999.0
corre_space_cri: -999.0
sampling_opt: land
number of selected tracks during 2001-2020: 1623
number of selected tracks during 2001-2020: 20085
number of selected tracks during 2001-2020: 11427
number of selected tracks during 2001-2020: 4764
number of selected tracks during 2001-2020: 4567
CPU times: user 59min 14s, sys: 9min 13s, total: 1h 8min 27s
Wall time: 1h 10min 17s


In [10]:
out_dir = Path('/scratch/wmtsai/temp_mcs/output_stats/vars_env_durations')
for n, dtype_string in enumerate(['SL','ML','LL','UL','UUL']):
    
    data = vars_dtype_merged[n]
    data.to_netcdf(out_dir / 'envs_MCS_phase_duration.{}.{}.{}.{}.nc'.format(dtype_string,sampling_opt,
                                                                          year_list[0], year_list[-1]))

#### extract featstats datasets

In [12]:
%%time
# separate by mcs duration groups

year_list = np.arange(2001,2021)

########  parameters for filtering MCS tracks  ########
corr_temp_cri = -999. # temporal correlation between the mean values of ERA-5 and GPM precip during the evolution
corr_space_cri = -999. # mean spatial correlation between ERA-5 and GPM precip. 2-D maps during the evolution
sampling_opt = 'land' # MCS geolocation: 'all','ocean','land'
######################################################33

print('corre_temp_cri: {}'.format(corr_temp_cri))
print('corre_space_cri: {}'.format(corr_space_cri))
print('sampling_opt: {}'.format(sampling_opt))

mcs_dir = Path('/scratch/wmtsai/temp_mcs/mcs_stats/mcs_tracks_non2mcs/')
featstats_dir = Path('/scratch/wmtsai/temp_mcs/mcs_stats/mcs_tracks_non2mcs/tracks_area_mean/')

vars_dtype_merged = []
for (dmin, dmax, duration_type) in zip([5,6,12,18,24],[6,12,18,24,200],
                                       ['SL','ML','LL','UL','UUL']):
    
    data_tracks_multiyr = []
    for year in year_list:
        
        dir_envs_track = Path('/scratch/wmtsai/temp_mcs/mcs_stats/envs_track/{}/tropics_extend'.format(year))
        # selecting files
        # 1. filtered by spatial-temporal precipitation coherency between ERA-5 and GPM
        files_HCC = get_files_HCC(year, corr_temp_cri, corr_space_cri)
        # 2. filtered by genesis location: 'all','ocean','land'
        files_geoloc = get_files_landsea(year, sampling_opt)
        # 3. grouping by MCS duration
        files_duration = get_files_duration(year, duration_min=dmin, duration_max=dmax)

        files_tmp = list(set(files_HCC).intersection(files_duration))
        files_comb = list(set(files_tmp).intersection(files_geoloc))
        
        # create a list of selected tracks in the specified year
        track_list = []
        for file in files_comb:
            track_list.extend([int(file.name[-11:-6])])
        
        data_feat_track = xr.open_dataset(featstats_dir / 'featstats_tracks_non2mcs_{}.tropics30NS.extend.nc'.format(year))
        data_feat_sel = data_feat_track.sel(tracks=track_list).compute()
                               
        for track in data_feat_sel.tracks.values:
            data_out = data_feat_sel.sel(tracks=track).drop('tracks') #(rel_times)     
            data_tracks_multiyr.append(data_out)
                               
    data_tracks_xr = xr.concat(data_tracks_multiyr, pd.Index(np.arange(len(data_tracks_multiyr)), name='total_tracks'))
    data_tracks_xr = data_tracks_xr.assign_attrs(description='MCSs during {}-{}'.format(year_list[0], year_list[-1]),
                                                 surface_type='{}'.format(sampling_opt),
                                                 duration_min= '{} hours'.format(dmin),
                                                 duration_max= '{} hours'.format(dmax))    
        
    print('number of selected tracks during {}-{}: {}'.format(year_list[0],year_list[-1],len(data_tracks_multiyr)))
                               
    out_dir = Path('/scratch/wmtsai/temp_mcs/output_stats/vars_env_durations')
    data_tracks_xr.to_netcdf(out_dir / 'featstats_MCS_phase_duration.{}.{}.{}.{}.nc'.format(duration_type,sampling_opt,
                                                                          year_list[0], year_list[-1]))

corre_temp_cri: -999.0
corre_space_cri: -999.0
sampling_opt: land
number of selected tracks during 2001-2020: 1623
number of selected tracks during 2001-2020: 20085
number of selected tracks during 2001-2020: 11427
number of selected tracks during 2001-2020: 4764
number of selected tracks during 2001-2020: 4567
CPU times: user 1min 59s, sys: 8.39 s, total: 2min 7s
Wall time: 2min 8s


#### extract non2MCS tracks from PNNL's datasets

In [None]:
%%time
# separate by mcs duration groups

year_list = np.arange(2001,2021)

########  parameters for filtering MCS tracks  ########
corr_temp_cri = -999 # temporal correlation between the mean values of ERA-5 and GPM precip during the evolution
corr_space_cri = -999 # mean spatial correlation between ERA-5 and GPM precip. 2-D maps during the evolution
sampling_opt = 'ocean' # MCS geolocation: 'all','ocean','land'
######################################################33

print('corre_temp_cri: {}'.format(corr_temp_cri))
print('corre_space_cri: {}'.format(corr_space_cri))
print('sampling_opt: {}'.format(sampling_opt))

mcs_dir = Path('/scratch/wmtsai/temp_mcs/mcs_stats/mcs_tracks_non2mcs/')
featstats_dir = Path('/scratch/wmtsai/temp_mcs/mcs_stats/mcs_tracks_non2mcs/tracks_area_mean/')
env_pnnl_dir = Path('/neelin2020/mcs_flextrkr/era5_envs/')

vars_dtype_merged = []
for (dmin, dmax, duration_type) in zip([18,24],[24,200],
                                       ['UL','UUL']):
    
    data_tracks_multiyr = []
    for year in year_list:
        
        dir_envs_track = Path('/scratch/wmtsai/temp_mcs/mcs_stats/envs_track/{}/tropics_extend'.format(year))
        # selecting files
        # 1. filtered by spatial-temporal precipitation coherency between ERA-5 and GPM
        files_HCC = get_files_HCC(year, corr_temp_cri, corr_space_cri)
        # 2. filtered by genesis location: 'all','ocean','land'
        files_geoloc = get_files_landsea(year, sampling_opt)
        # 3. grouping by MCS duration
        files_duration = get_files_duration(year, duration_min=dmin, duration_max=dmax)

        files_tmp = list(set(files_HCC).intersection(files_duration))
        files_comb = list(set(files_tmp).intersection(files_geoloc))
        
        # create a list of selected tracks in the specified year
        track_list = []
        for file in files_comb:
            track_list.extend([int(file.name[-11:-6])])
        
        data_env_track = xr.open_dataset(env_pnnl_dir / 'mcs_era5_mean_envs_{}0101.0000_{}0101.0000.nc'.format(year,year+1))
        data_non2mcs = xr.open_dataset(mcs_dir / 'mcs_tracks_non2mcs_{}.tropics30NS.extend.nc'.format(year))
        data_env_sel = data_env_track.sel(tracks=track_list).compute()
        data_non2mcs_sel = data_non2mcs.sel(tracks=track_list).compute()
                               
        for track in data_env_sel.tracks.values:
            data_out = data_env_sel.sel(tracks=track).drop('tracks') #(rel_times)
            phase_list = [
                  data_non2mcs_sel.sel(tracks=track).idt_ccs_init.values,
                  data_non2mcs_sel.sel(tracks=track).idt_mcs_init.values,
                  data_non2mcs_sel.sel(tracks=track).idt_mcs_grow.values,
                  data_non2mcs_sel.sel(tracks=track).idt_mcs_mature.values,
                  data_non2mcs_sel.sel(tracks=track).idt_mcs_decay.values,
                  data_non2mcs_sel.sel(tracks=track).idt_mcs_end.values] 
            # get values at mcs phases
            data_phase = data_out.sel(rel_times=phase_list)
            # replace "rel_times" by mcs_phase corresponding to the common standard
            data_phase['rel_times'] = ['CCS','Init','Grow','Mature','Decay','End']
            data_phase = data_phase.rename({'rel_times':'mcs_phase'})            
            data_tracks_multiyr.append(data_phase)
                               
    data_tracks_xr = xr.concat(data_tracks_multiyr, pd.Index(np.arange(len(data_tracks_multiyr)), name='total_tracks'))
    data_tracks_xr = data_tracks_xr.assign_attrs(description='MCSs during {}-{}'.format(year_list[0], year_list[-1]),
                                                 surface_type='{}'.format(sampling_opt),
                                                 duration_min= '{} hours'.format(dmin),
                                                 duration_max= '{} hours'.format(dmax))    
        
    print('number of selected tracks during {}-{}: {}'.format(year_list[0],year_list[-1],len(data_tracks_multiyr)))
                               
    out_dir = Path('/scratch/wmtsai/temp_mcs/output_stats/vars_env_durations')
    data_tracks_xr.to_netcdf(out_dir / 'envsPNNL_MCS_phase_duration.{}.{}.{}.{}.nc'.format(duration_type,sampling_opt,
                                                                          year_list[0], year_list[-1]))

#### Double check with PNNL ERA-5 6-deg. mean environmental varaibles
- confirm the correctness of env-MCS files ('phase','x','y','level') processed on Jupiter

In [None]:
mcs_id = 32204
vars_list = ['cloudtracknumber_nomergesplit','mtpr','t','q']
data_merged = vars_mcs_env('/scratch/wmtsai/temp_mcs/mcs_stats/envs_track/2002/tropics_extend/mcs_era5_3D_envs_2002.{}.LD.nc'.format(mcs_id))

In [None]:
data_3deg = data_merged.sel(area_type='3deg_mean')

In [None]:
tmp2 = xr.open_dataset('/scratch/wmtsai/temp_mcs/mcs_stats/mcs_tracks_non2mcs/mcs_tracks_non2mcs_2002.tropics30NS.extend.nc')
idt_mcs_mature = tmp2.sel(tracks=mcs_id).idt_mcs_mature.values

In [None]:
data_pnnl = xr.open_dataset('/neelin2020/mcs_flextrkr/era5_envs/mcs_era5_mean_envs_20020101.0000_20030101.0000.nc')
data_6degPNNL = data_pnnl.sel(tracks=mcs_id)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(4,4.5))
data_3deg.q.sel(mcs_phase='Mature').plot(ax=ax, y='level')
data_6degPNNL.Q.sel(rel_times=idt_mcs_mature).plot(ax=ax,y='level')
plt.ylim([1000,100])

#### check nonmcs track samples

In [None]:
data = xr.open_dataset('/scratch/wmtsai/temp_mcs/mcs_stats/envs_track/2010/nonmcs_extend/nonmcs_era5_3D_envs_2010.0085022.LD.nc')
# maxtime = data.area.argmax('times')
# idx_5hr = np.where(data.track_duration == 5)[0]
# idx_areamax3hr = np.where(maxtime == 2)[0]
# idx_com = np.intersect1d(idx_5hr, idx_areamax3hr)

In [None]:
tmp = data.sel(mcs_phase='Mature').tb
tmp = tmp.where(tmp < 241, 0)
tmp = tmp.where(tmp ==0, 1)
tmp.plot.contour(levels=[0,1], zorder=2)
data.Buoy_TOT.sel(mcs_phase='Mature').plot(zorder=1, cmap='jet', vmax=0.05, vmin=-0.15)
data.Buoy_TOT.sel(mcs_phase='Mature').plot.contour(zorder=1, levels=[0], colors=['r'])

In [None]:
delta_pl=1000-100-500
delta_pb=100
wb=(delta_pb/delta_pl)*np.log((delta_pl+delta_pb)/delta_pb)
wl=1-wb

In [None]:
wb

In [None]:
wl