In [None]:
import os
import sys
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from datetime import datetime
from pathlib import Path
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
from matplotlib.patches import Rectangle

import cartopy.crs as ccrs
import cartopy.feature as cfeat
from cartopy.util import add_cyclic_point
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

# stats
from scipy.stats import pearsonr

import warnings

In [None]:
warnings.filterwarnings('ignore')

In [None]:
# set fonts configuration - Arial
matplotlib.rcParams['font.family'] = "Open Sans"
matplotlib.rcParams['font.sans-serif'] = "Arial"

In [None]:
colors1 = plt.cm.Blues(np.linspace(0., 1, 10))
colors2 = plt.cm.jet(np.linspace(0.5, 1, 30))

# combine them and build a new colormap
colors = np.vstack((colors1, colors2))
mixedmap = LinearSegmentedColormap.from_list('my_colormap', colors)
mixedmap

In [None]:
# %%time
# phase_list = [data_tracks.idt_mcs_init,data_tracks.idt_mcs_grow, data_tracks.idt_mcs_mature,  
#               data_tracks.idt_mcs_decay,data_tracks.idt_mcs_end]

# data_track_phase = []
# for p,idt_phase in enumerate(phase_list):

#     tmp_list = []
#     for track in data_tracks.tracks:
#         tmp_list.append(data_tracks.isel(times=idt_phase).sel(tracks=track).drop('times'))
#     data_track_phase.append(xr.concat(tmp_list, pd.Index(data_tracks.tracks.values, name='tracks')))
# data_track_phase_xr = xr.concat(data_track_phase, pd.Index(["Init", "Grow", "Mature", "Decay", "End"], name='mcs_phase'))

In [None]:
def BL_mcs_2dmap(fid_envs_track):
    """
    input: processed envs_track file containing MCS feature mask and 2D/3D variables on ERA-5 coordinates
    return BL componets within the tracked MCS feature
    """
    
    data = xr.open_dataset(fid_envs_track)
    BL_TOT = data.BL_TOT
    BL_CAPE = data.BL_CAPE
    BL_SUBSAT = data.BL_SUBSAT
    
    # maskout grids not covered by MCS 
    mcs_mask = data.cloudtracknumber_nomergesplit # binary mask
    BL_TOT_masked = BL_TOT.where(mcs_mask > 0)
    BL_CAPE_masked = BL_CAPE.where(mcs_mask > 0)
    BL_SUBSAT_masked = BL_SUBSAT.where(mcs_mask > 0)
    
    BL_TOT_env = BL_TOT.where(mcs_mask == 0)
    BL_CAPE_env = BL_CAPE.where(mcs_mask == 0)
    BL_SUBSAT_env = BL_SUBSAT.where(mcs_mask == 0)
    
    return BL_TOT_masked, BL_CAPE_masked, BL_SUBSAT_masked, BL_TOT_env, BL_CAPE_env, BL_SUBSAT_env

In [None]:
def BL_fract_amp_condmn(files):

    # bins for BL_CAPE and BL_SUBSAT
    bins_fract = np.linspace(0,0.4,21)
    bins_amp = np.linspace(0,0.01,21)
    bins_samples = np.zeros((5, len(bins_fract)-1, len(bins_amp)-1)) # histograms for 5 MCS phases
    bins_duration = np.copy(bins_samples)

    track_list = []
    for file in files:
        
        track_list.append(int(file.name[-8:-3])) # save track number 
        
        tmp = xr.open_dataset(file)
        # calculate BL+ fraction and BL+ amplitude
        BL_mcs = tmp.Buoy_TOT.where(tmp.cloudtracknumber_nomergesplit > 0)
        BL_pos = BL_mcs.where(BL_mcs > 0, 0)
        BL_pos_binary = BL_pos.where(BL_pos ==0, 1)
        mcs_area = tmp.cloudtracknumber_nomergesplit.sum(('x','y'))
        BL_pos_area = BL_pos_binary.sum(('x','y'))
        BL_pos_frac = BL_pos_area/mcs_area
        BL_amp = BL_mcs.where(BL_mcs > 0).mean(('x','y'))
        
        # get mcs duration
        mcs_duration = data_tracks.sel(tracks=int(file.name[-8:-3])).mcs_duration
        
        for p, phase in enumerate(["Init", "Grow", "Mature", "Decay", "End"]):

            # get 1-D fract and amp values associated with MCS
            fract_1d = BL_pos_frac.sel(mcs_phase=phase).values # the last one is NaN
            amp_1d = BL_amp.sel(mcs_phase=phase).values # the last one is NaN 

            for i in range(len(bins_fract)-1):
                idx = np.where(np.logical_and(fract_1d >= bins_fract[i], fract_1d < bins_fract[i+1]))[0]
                for j in range(len(bins_amp)-1):
                    idy = np.where(np.logical_and(amp_1d >= bins_amp[j], amp_1d < bins_amp[j+1]))[0]

                    if len(idx) > 0 and len(idy) > 0:
                        bins_samples[p,i,j] += 1
                        bins_duration[p,i,j] += mcs_duration.values
                    
    ds_bins = xr.Dataset(data_vars = dict(samples = (['phase','bins_fract','bins_amp'], bins_samples),
                                          mcs_duration_sum = (['phase','bins_fract','bins_amp'], bins_duration)),
                 coords = dict(tracks = track_list,
                               phase = (['phase'], ['Initial', 'Grow', 'Mature', 'Decay', 'End']),
                               bins_fract = (['bins_fract'], bins_fract[:-1]),
                               bins_amp = (['bins_amp'], bins_amp[:-1])),
                 attrs = dict(description = 'Buoyacny fraction - amplitude histogram. 2014'))
    
    return ds_bins

In [None]:
def data_tracks_BL_features(files):
    """
    return tracks by BL_TOT bins (unit: K)
    """
    
    track_list = []
    BL_features_list = []    
        
    for file in files:
        
        track_list.append(int(file.name[-8:-3])) # save track number 
        
        tmp = xr.open_dataset(file)
        # calculate BL+ fraction and BL+ amplitude
        BL_mcs = tmp.Buoy_TOT.where(tmp.cloudtracknumber_nomergesplit > 0)
        BL_pos = BL_mcs.where(BL_mcs > 0, 0)
        BL_pos_nan = BL_mcs.where(BL_mcs > 0, np.nan)
        BL_pos_binary = BL_pos.where(BL_pos ==0, 1)
        mcs_area = tmp.cloudtracknumber_nomergesplit.sum(('x','y'))
        BL_pos_area = BL_pos_binary.sum(('x','y')).rename('BL_positive_area')
        BL_pos_frac = (BL_pos_area/mcs_area).rename('BL_positive_fraction')
        BL_tot_bp = BL_pos_nan.mean(('x','y')).rename('BL_TOT_bp')
        
        # BL estimates
        BL_tot_mcs = BL_mcs.mean(('x','y')).rename('BL_TOT_mcs')
        BL_cape_mcs = tmp.Buoy_CAPE.where(tmp.cloudtracknumber_nomergesplit > 0).mean(('x','y')).rename('BL_CAPE_mcs')
        BL_subsat_mcs = tmp.Buoy_SUBSAT.where(tmp.cloudtracknumber_nomergesplit > 0).mean(('x','y')).rename('BL_SUBSAT_mcs')
        
        # replace 0 by nan
        BL_pos_area = BL_pos_area.where(BL_pos_area > 0, np.nan)
        BL_pos_frac = BL_pos_frac.where(BL_pos_frac > 0, np.nan)
        
        BL_features_list.append(xr.merge([BL_pos_area, BL_pos_frac, BL_tot_bp, BL_tot_mcs,
                                         BL_cape_mcs, BL_subsat_mcs]))
    
    BL_features_xr = xr.concat(BL_features_list, pd.Index(track_list, name='tracks'))
    
    return BL_features_xr

In [None]:
def data_tracks_phase(data_tracks, var_name):
    """
    return area-related variables 
    """
    var_list = []
    track_list = []
    
    for track in data_tracks.tracks.values:

        track_list.append(track)
        tmp = data_tracks.sel(tracks=track)
        phase_list = [tmp.idt_mcs_init.values, tmp.idt_mcs_grow.values, tmp.idt_mcs_mature.values,
                      tmp.idt_mcs_decay.values, tmp.idt_mcs_end.values]

        ds = xr.Dataset(data_vars=dict(var_null=(['mcs_phase'], tmp[var_name].sel(times=phase_list).values)),
                        coords=dict(mcs_phase=(['mcs_phase'],['Init', 'Grow', 'Mature', 'Decay', 'End'])))

        var_list.append(ds)

    ds_xr = xr.concat(var_list, pd.Index(track_list, name='tracks')).rename_vars({'var_null':var_name})
        
    return ds_xr

In [None]:
def data_tracks_precip_features(files):
    """
    return tracks by BL_TOT bins (unit: K)
    """
    
    track_list = []
    precip_features_list = []    
    corr_coeff_temp = []
        
    for file in files:
        
        track_list.append(int(file.name[-8:-3])) # save track number 
        
        tmp = xr.open_dataset(file)
        # calculate precip amplitude within mcs
        prec_mcs = tmp.precipitationCal.where(tmp.cloudtracknumber_nomergesplit > 0)
        mtpr_mcs = tmp.mtpr.where(tmp.cloudtracknumber_nomergesplit > 0)
        prec_amp_mcs = prec_mcs.mean(('x','y')).rename('precipitationCal_mcs')
        mtpr_amp_mcs = mtpr_mcs.mean(('x','y')).rename('mtpr_mcs')
        
        prec_mcs = tmp.precipitationCal.where(tmp.cloudtracknumber_nomergesplit > 0, 0)
        mtpr_mcs = tmp.mtpr.where(tmp.cloudtracknumber_nomergesplit > 0, 0)
        # estimate spatial correlation 
        corr_coeff = []
        for phase in tmp.mcs_phase:
            prec_era5 = prec_mcs.sel(mcs_phase=phase)
            prec_gpm = mtpr_mcs.sel(mcs_phase=phase)
            stats = pearsonr(prec_era5.values.ravel(), prec_gpm.values.ravel())
            corr_coeff.append(stats[0]) # save correlation coefficient 
        corr_coeff_space = xr.Dataset(data_vars=dict(corr_coeff_space=(['mcs_phase'],np.asarray(corr_coeff))),
                                 coords=dict(mcs_phase=(['mcs_phase'], tmp.mcs_phase.values)))
        
        # estimate temporal correlation 
        corr_coeff_temp.append(pearsonr(prec_amp_mcs.values.ravel(), mtpr_amp_mcs.values.ravel())[0])
        
        precip_features_list.append(xr.merge([prec_amp_mcs ,mtpr_amp_mcs,
                                              corr_coeff_space]))
    
    corr_coeff_temp_xr = xr.Dataset(data_vars=dict(corr_coeff_temp=(['tracks'],np.asarray(corr_coeff_temp))),
                                 coords=dict(tracks=(['tracks'], track_list)))
    precip_features_xr = xr.concat(precip_features_list, pd.Index(track_list, name='tracks'))

    # merge two datasets
    precip_features_xr = xr.merge([precip_features_xr, corr_coeff_temp_xr])
    
    return precip_features_xr

In [None]:
%%time

data_tracks_list = []

year_list = [2014,2015]
for year in year_list:

    print('processing year: {}'.format(year))
    # directory of the mcs_3dvars files
    dir_envs_track = Path('/neelin2020/mcs_flextrkr/mcs_stats/envs_track/{}/tropics'.format(year))
    files = sorted(list(dir_envs_track.glob('*.nc')))

    # load data_tracks 
    data_tracks = xr.open_dataset('/neelin2020/mcs_flextrkr/mcs_stats/mcs_tracks_non2mcs/mcs_tracks_non2mcs_{}.tropics30NS.nc'.format(year))

    # write out BL_features dataset based on mcs_envs output
    data_BL_features = data_tracks_BL_features(files)
    data_precip_features = data_tracks_precip_features(files)
    
    # area-related variables
    data_ccs_area = data_tracks_phase(data_tracks, var_name='ccs_area')
    data_core_area = data_tracks_phase(data_tracks, var_name='core_area')
    data_cold_area = data_tracks_phase(data_tracks, var_name='cold_area')
    data_area_features = xr.merge([data_ccs_area, data_core_area, data_cold_area])
    
    corr_temp = data_precip_features.corr_coeff_temp
    corr_coeff_space = data_precip_features.corr_coeff_space.mean('mcs_phase')
    
    # filtering out tracks with low coherency in precip evolution
    cond_1 = corr_temp > 0.7
    cond_2 = corr_coeff_space > 0

    data_tracks_list.append(xr.merge([data_tracks['mcs_duration'].isel(tracks=np.where(cond_1 & cond_2)[0]),
                                data_BL_features.isel(tracks=np.where(cond_1 & cond_2)[0]),
                                data_precip_features.isel(tracks=np.where(cond_1 & cond_2)[0]),
                                data_area_features.isel(tracks=np.where(cond_1 & cond_2)[0])]
                                ))

In [None]:
# concatenate multi-year tracks into one dataset

data_tracks_sum = []
number_multiyr = 0

for data_tracks_singleyr in data_tracks_list:
    
    number_multiyr += len(data_tracks_singleyr.tracks)
    for i in range(len(data_tracks_singleyr.tracks)):
        tmp = data_tracks_singleyr.isel(tracks=i).drop('tracks')
        data_tracks_sum.append(tmp)
        
data_tracks_multiyr = xr.concat(data_tracks_sum, pd.Index(np.arange(1,number_multiyr+1), name='samples'))

In [None]:
fig, ((ax1,ax2,ax3),(ax4,ax5,ax6)) = plt.subplots(2,3,figsize=(12,6))

for (ax, duration_min, duration_max) in zip([ax1,ax2,ax3,ax4,ax5],[0,6,12,18,24],[6,12,18,24,48]):
    
    # different mcs lifetimes
    tmp = data_tracks_multiyr.mcs_duration
    idx = np.where(np.logical_and(tmp >= duration_min, tmp < duration_max))[0]
    data_sub = data_tracks_multiyr.isel(samples=idx) # tracks with the corresponding range of the mcs duration
    
    ax.plot(data_sub.mcs_phase, 625*data_sub.BL_positive_area.mean('samples'), '-ok')
    ax2 = ax.twinx()
    ax2.errorbar(data_sub.mcs_phase, 340*3/9.8*data_sub.BL_TOT_bp.mean('samples'),
             yerr=340*3/9.8*data_sub.BL_TOT_bp.std('samples')/np.sqrt(len(data_sub.samples)), color='b')

    ax.set_ylabel('Area of Buoy$^{+}$ (km$^2$)', color='k', fontsize=10)
    ax2.set_ylabel('Mean amplitude of Buoy$^{+}$ (K)', color='b', fontsize=10)
    ax.set_title('non2MCS tracks, \n Samples: {}'.format(len(data_sub.samples)), fontsize=10.5)
    
    ax.set_ylim([3500,26000])
    ax2.set_ylim([0.18, 0.4])
    
plt.tight_layout()

In [None]:
fig, (ax, ax2) = plt.subplots(1,2,figsize=(8,2.5))

for n, (duration_min, duration_max, ccolor) in enumerate(zip([0,6,12,18,24],[6,12,18,24,48],
                                                ['k','y','orange','r','m',''])):
    
    # different mcs lifetimes
    tmp = data_tracks_multiyr.mcs_duration
    idx = np.where(np.logical_and(tmp >= duration_min, tmp < duration_max))[0]
    data_sub = data_tracks_multiyr.isel(samples=idx) # tracks with the corresponding range of the mcs duration
    
    # GPM-IMERG
    ax.errorbar(data_sub.mcs_phase, data_sub.precipitationCal_mcs.mean('samples'), linestyle='--', marker='o', color=ccolor,
               yerr=data_sub.precipitationCal_mcs.std('samples')/np.sqrt(len(idx)),
               label='{}-{}H ({})'.format(duration_min, duration_max, len(idx)))
    # ERA-5
    ax.errorbar(data_sub.mcs_phase, data_sub.mtpr_mcs.mean('samples'), linestyle='--', marker='x', color=ccolor,
               yerr=data_sub.mtpr_mcs.std('samples')/np.sqrt(len(idx)))
    
    ax.set_ylabel('Rain rate, MCS (mm/hr)', fontsize=10)
    ax.set_ylim([0.5, 5.5])
    
for n, (duration_min, duration_max, ccolor) in enumerate(zip([0,6,12,18,24],[6,12,18,24,48],
                                                ['k','y','orange','r','m',''])):
    
    # different mcs lifetimes
    tmp = data_tracks_multiyr.mcs_duration
    idx = np.where(np.logical_and(tmp >= duration_min, tmp < duration_max))[0]
    data_sub = data_tracks_multiyr.isel(samples=idx) # tracks with the corresponding range of the mcs duration
    
    ax2.errorbar(data_sub.mcs_phase, np.sqrt(data_sub.ccs_area).mean('samples'), linestyle='--', marker='o', color=ccolor,
               yerr=np.sqrt(data_sub.ccs_area).std('samples')/np.sqrt(len(idx)),
               label='{}-{}H ({})'.format(duration_min, duration_max, len(idx)))
    ax2.set_ylabel('sqrt( Area ) (km)', fontsize=10)        
    ax2.set_ylim([200, 600])

ax.set_title('MCS area-average precip: GPM-IMERG', fontsize=10)
ax2.set_title('Cold cloud shield (CCS) Area', fontsize=10)

ax.legend(fontsize=8)
ax.tick_params(labelsize=9)
ax2.tick_params(labelsize=9)
    
plt.tight_layout()

In [None]:
fig, (ax,ax2,ax3) = plt.subplots(1,3,figsize=(12,2.5))
axy = ax.twinx()

for n, (duration_min, duration_max, ccolor) in enumerate(zip([0,6,12,18,24],[6,12,18,24,48],
                                                ['k','y','orange','r','m'])):
    # different mcs lifetimes
    tmp = data_tracks_multiyr.mcs_duration
    idx = np.where(np.logical_and(tmp >= duration_min, tmp < duration_max))[0]
    data_sub = data_tracks_multiyr.isel(samples=idx) # tracks with the corresponding range of the mcs duration
    
    # core cloud fraction & positive buoyancy fraction
    tmp = (data_sub.core_area/data_sub.ccs_area)
    ax.errorbar(data_sub.mcs_phase, tmp.mean('samples'), marker='^', linestyle='--', color=ccolor,
               yerr=np.sqrt(tmp).std('samples')/np.sqrt(len(idx)),
               label='{}-{}H ({})'.format(duration_min, duration_max, len(idx)))
    
    tmp = (data_sub.BL_positive_fraction)
    axy.errorbar(data_sub.mcs_phase, tmp.mean('samples'), marker='o', markersize=4, linestyle='--', color=ccolor,
               yerr=np.sqrt(tmp).std('samples')/np.sqrt(len(idx)),
               label='{}-{}H ({})'.format(duration_min, duration_max, len(idx)))
    
    ax.set_ylim([0, 0.65])
    axy.set_ylim([0, 0.65])
    axy.set_yticks([])
    ax.set_ylabel('Fraction of area',fontsize=10)

    # buoyancy mean
    tmp = 340*3/9.8*data_sub.BL_TOT_mcs
    ax2.errorbar(data_sub.mcs_phase, tmp.mean('samples'), marker='^', linestyle='--', color=ccolor,
               yerr=np.sqrt(tmp).std('samples')/np.sqrt(len(idx)))
    ax2.set_ylabel('Mean B$_{L}$, MCS (K)',fontsize=10)
    # positive buoyancy mean
    tmp = 340*3/9.8*data_sub.BL_TOT_bp
    ax3.errorbar(data_sub.mcs_phase, tmp.mean('samples'), marker='o', markersize=4, linestyle='--', color=ccolor,
               yerr=np.sqrt(tmp).std('samples')/np.sqrt(len(idx)))
    ax3.set_ylabel('Mean B$_{L}$, Pos (K)',fontsize=10)
    ax3.set_ylim([0,0.5])

ax.tick_params(labelsize=9)
ax2.tick_params(labelsize=9)
ax3.tick_params(labelsize=9)
ax4.tick_params(labelsize=9)

ax.legend(fontsize=8, loc='center');

plt.tight_layout()

In [None]:
idx_mcslim = np.where(data_tracks_multiyr.mcs_duration > 5)[0]
data_tracks_multiyr_rm = data_tracks_multiyr.isel(samples=idx_mcslim)

In [None]:
fig,ax = plt.subplots(1,3,figsize=(15,3))

phase_sel = data_tracks_multiyr.mcs_phase[:3]

for n,phase in enumerate(phase_sel.values):
    BL_tot_init = data_tracks_multiyr_rm.BL_TOT_mcs.sel(mcs_phase=phase)
    BL_cape_init = data_tracks_multiyr_rm.BL_CAPE_mcs.sel(mcs_phase=phase)
    BL_subsat_init = data_tracks_multiyr_rm.BL_SUBSAT_mcs.sel(mcs_phase=phase)
    ccs_area = data_tracks_multiyr_rm.ccs_area.sel(mcs_phase=phase)
    mcs_duration = data_tracks_multiyr_rm.mcs_duration
    ax[n].scatter(340*3/9.8*BL_subsat_init, 340*3/9.8*BL_cape_init, c=mcs_duration
                , cmap='jet', vmin=5, vmax=48)

    ax[n].set_ylim([-5,5]);
    ax[n].set_xlim([0,8])
    ax[n].set_title('MCS phase: {}'.format(phase))

#ax.set_ylim([35000, 150000])

In [None]:
# pdf of mcs duration at different BL,int
BL_init = data_tracks_multiyr.BL_TOT_mcs.sel(mcs_phase='Init')
BL_init_sort = BL_init.sortby(BL_init)

In [None]:
plt.hist(340*3/9.8*BL_init_sort, bins=np.arange(-8,2,0.2).tolist(), rwidth=0.5)

In [None]:
# scatter plot BL, mean; BL, pos-mean; mcs duration
# science question: which matters more for longer durations?

fig, (ax1,ax2,ax3) = plt.subplots(1,3,figsize=(12,3.5))

bp_init = 340*3/9.8*data_tracks_multiyr.BL_TOT_bp.sel(mcs_phase='Init')
bp_area = 625*data_tracks_multiyr.BL_positive_area.sel(mcs_phase='Init')
bmean_init = 340*3/9.8*data_tracks_multiyr.BL_TOT_mcs.sel(mcs_phase='Init')
cp1 = ax1.scatter(bp_init, bp_area, c=data_tracks_multiyr.mcs_duration, s=10, vmin=0, vmax=40, cmap='jet')
cbar = plt.colorbar(cp1, ax=ax1, shrink=0.6)
cbar.set_label('MCS duration (hr)',fontsize=10)
ax1.set_xlabel('Mean amplitude of Buoy.$^{+}$ (K)')
ax1.set_ylabel('Area of Buoy.$^{+}$ (km$^2$)')
ax1.set_title('MCS phase: Initial',fontsize=10.5)

bp_init = 340*3/9.8*data_tracks_multiyr.BL_TOT_bp.sel(mcs_phase='Grow')
bp_frac = 625*data_tracks_multiyr.BL_positive_area.sel(mcs_phase='Grow')
bmean_init = 340*3/9.8*data_tracks_multiyr.BL_TOT_mcs.sel(mcs_phase='Grow')
cp2 = ax2.scatter(bp_init, bp_frac, c=data_tracks_multiyr.mcs_duration, s=10, vmin=0, vmax=40, cmap='jet')
cbar = plt.colorbar(cp2, ax=ax2, shrink=0.6)
cbar.set_label('MCS duration (hr)',fontsize=10)
ax2.set_xlabel('Mean amplitude of Buoy.$^{+}$ (K)')
ax2.set_ylabel('Area of Buoy.$^{+}$ (km$^2$)')
ax2.set_title('MCS phase: Growing',fontsize=10.5)

bp_init = 340*3/9.8*data_tracks_multiyr.BL_TOT_bp.sel(mcs_phase='Mature')
bp_frac = 625*data_tracks_multiyr.BL_positive_area.sel(mcs_phase='Mature')
bmean_init = 340*3/9.8*data_tracks_multiyr.BL_TOT_mcs.sel(mcs_phase='Mature')
cp3 = ax3.scatter(bp_init, bp_frac, c=data_tracks_multiyr.mcs_duration, s=10, vmin=0, vmax=40, cmap='jet')
cbar = plt.colorbar(cp3, ax=ax3, shrink=0.6)
cbar.set_label('MCS duration (hr)',fontsize=10)
ax3.set_xlabel('Mean amplitude of Buoy.$^{+}$ (K)')
ax3.set_ylabel('Area of Buoy.$^{+}$ (km$^2$)')
ax3.set_title('MCS phase: Mature',fontsize=10.5)

ax1.set_xlim([-0.01,1.01])
ax2.set_xlim([-0.01,1.01])
ax3.set_xlim([-0.01,1.01])

ax1.set_ylim([0, 140000])
ax2.set_ylim([0, 140000])
ax3.set_ylim([0, 140000])

plt.tight_layout()

In [None]:
# scatter plot BL, mean; BL, pos-mean; mcs duration
# science question: which matters more for longer durations?

fig, (ax1,ax2,ax3) = plt.subplots(1,3,figsize=(12,3.5))

bp_init = 340*3/9.8*data_tracks_HCC.BL_TOT_bp.sel(mcs_phase='Init')
bp_frac = data_tracks_HCC.BL_positive_fraction.sel(mcs_phase='Init')
bmean_init = 340*3/9.8*data_tracks_HCC.BL_TOT_mcs.sel(mcs_phase='Init')
mcs_duration = data_tracks
cp1 = ax1.scatter(bp_init, bp_frac, c=data_tracks_HCC.mcs_duration, s=10, vmin=0, vmax=40, cmap='jet')
cbar = plt.colorbar(cp1, ax=ax1, shrink=0.6)
cbar.set_label('MCS duration (hr)',fontsize=10)
ax1.set_xlabel('Mean amplitude of Buoy.$^{+}$ (K)')
ax1.set_ylabel('Fraction of Buoy.$^{+}$')
ax1.set_title('MCS phase: Initial',fontsize=10.5)

bp_init = 340*3/9.8*data_tracks_HCC.BL_TOT_bp.sel(mcs_phase='Grow')
bp_frac = data_tracks_HCC.BL_positive_fraction.sel(mcs_phase='Grow')
bmean_init = 340*3/9.8*data_tracks_HCC.BL_TOT_mcs.sel(mcs_phase='Grow')
cp2 = ax2.scatter(bp_init, bp_frac, c=data_tracks_HCC.mcs_duration, s=10, vmin=0, vmax=40, cmap='jet')
cbar = plt.colorbar(cp2, ax=ax2, shrink=0.6)
cbar.set_label('MCS duration (hr)',fontsize=10)
ax2.set_xlabel('Mean amplitude of Buoy.$^{+}$ (K)')
ax2.set_ylabel('Fraction of Buoy.$^{+}$')
ax2.set_title('MCS phase: Growing',fontsize=10.5)

bp_init = 340*3/9.8*data_tracks_HCC.BL_TOT_bp.sel(mcs_phase='Mature')
bp_frac = data_tracks_HCC.BL_positive_fraction.sel(mcs_phase='Mature')
bmean_init = 340*3/9.8*data_tracks_HCC.BL_TOT_mcs.sel(mcs_phase='Mature')
cp3 = ax3.scatter(bp_init, bp_frac, c=data_tracks_HCC.mcs_duration, s=10, vmin=0, vmax=40, cmap='jet')
cbar = plt.colorbar(cp3, ax=ax3, shrink=0.6)
cbar.set_label('MCS duration (hr)',fontsize=10)
ax3.set_xlabel('Mean amplitude of Buoy.$^{+}$ (K)')
ax3.set_ylabel('Fraction of Buoy.$^{+}$')
ax3.set_title('MCS phase: Mature',fontsize=10.5)

ax1.set_xlim([-0.01,1.01])
ax2.set_xlim([-0.01,1.01])
ax3.set_xlim([-0.01,1.01])

ax1.set_ylim([0, 0.6])
ax2.set_ylim([0, 0.6])
ax3.set_ylim([0, 0.6])

plt.tight_layout()

In [None]:
# scatter plot BL, mean; BL, pos-mean; mcs duration
# science question: which matters more for longer durations?

fig, (ax1,ax2,ax3) = plt.subplots(1,3,figsize=(12,3.5))

bp_init = 340*3/9.8*data_BL_features.BL_TOT_bp.sel(mcs_phase='Init')
bp_frac = data_BL_features.BL_positive_fraction.sel(mcs_phase='Init')
bp_area = 625*data_BL_features.BL_positive_area.sel(mcs_phase='Init')
bmean_init = 340*3/9.8*data_BL_features.BL_TOT_mcs.sel(mcs_phase='Init')
mcs_duration = data_tracks
cp1 = ax1.scatter(bp_area, bp_frac, c=data_tracks.mcs_duration, s=10, vmin=0, vmax=40, cmap='jet')
cbar = plt.colorbar(cp1, ax=ax1, shrink=0.6)
cbar.set_label('MCS duration (hr)',fontsize=10)
ax1.set_xlabel('Area of Buoy.$^{+}$ (km$^2$)')
ax1.set_ylabel('Fraction of Buoy.$^{+}$')
ax1.set_title('MCS phase: Initial',fontsize=10.5)

bp_init = 340*3/9.8*data_BL_features.BL_TOT_bp.sel(mcs_phase='Grow')
bp_frac = data_BL_features.BL_positive_fraction.sel(mcs_phase='Grow')
bp_area = 625*data_BL_features.BL_positive_area.sel(mcs_phase='Grow')
bmean_init = 340*3/9.8*data_BL_features.BL_TOT_mcs.sel(mcs_phase='Grow')
cp2 = ax2.scatter(bp_area, bp_frac, c=data_tracks.mcs_duration, s=10, vmin=0, vmax=40, cmap='jet')
cbar = plt.colorbar(cp2, ax=ax2, shrink=0.6)
cbar.set_label('MCS duration (hr)',fontsize=10)
ax2.set_xlabel('Area of Buoy.$^{+}$ (km$^2$)')
ax2.set_ylabel('Fraction of Buoy.$^{+}$')
ax2.set_title('MCS phase: Growing',fontsize=10.5)

bp_init = 340*3/9.8*data_BL_features.BL_TOT_bp.sel(mcs_phase='Mature')
bp_frac = data_BL_features.BL_positive_fraction.sel(mcs_phase='Mature')
bp_area = 625*data_BL_features.BL_positive_area.sel(mcs_phase='Mature')
bmean_init = 340*3/9.8*data_BL_features.BL_TOT_mcs.sel(mcs_phase='Mature')
cp3 = ax3.scatter(bp_area, bp_frac, c=data_tracks.mcs_duration, s=10, vmin=0, vmax=40, cmap='jet')
cbar = plt.colorbar(cp3, ax=ax3, shrink=0.6)
cbar.set_label('MCS duration (hr)',fontsize=10)
ax3.set_xlabel('Area of Buoy.$^{+}$ (km$^2$)')
ax3.set_ylabel('Fraction of Buoy.$^{+}$')
ax3.set_title('MCS phase: Mature',fontsize=10.5)

ax1.set_xlim([0, 140000])
ax2.set_xlim([0, 140000])
ax3.set_xlim([0, 140000])

ax1.set_ylim([0, 0.6])
ax2.set_ylim([0, 0.6])
ax3.set_ylim([0, 0.6])

plt.tight_layout()

In [None]:
data_BL_init = data_BL_features.BL_TOT_mcs.sel(mcs_phase = 'Init')
data_BL_init_sort = data_BL_init.sortby(data_BL_init)
tracks_list = []

for n in range(4):
    tracks = data_BL_init_sort.tracks[n*len(data_BL_init_sort)//4:(n+1)*len(data_BL_init_sort)//4].values
    tracks_list.append(tracks)
    
data_BL_init = data_BL_features.BL_TOT_bp.sel(mcs_phase = 'Init')
data_BL_init_sort = data_BL_init.sortby(data_BL_init)
tracks_list_bp = []

for n in range(4):
    tracks = data_BL_init_sort.tracks[n*len(data_BL_init_sort)//4:(n+1)*len(data_BL_init_sort)//4].values
    tracks_list_bp.append(tracks)

In [None]:
# plot precip binned by different init. BL
fig, (ax1,ax2) = plt.subplots(1,2,figsize=(10,3.5))

for n, (tracks, q_label) in enumerate(zip(tracks_list, ['q4','q3','q2','q1'])):
    
    BL_init_mean = np.round(data_BL_features.BL_TOT_mcs.sel(tracks=tracks).mean().values*340/9.81*3,decimals=2)
    
    ax1.plot(data_precip_features.mcs_phase,
             data_precip_features.sel(tracks=tracks).precipitationCal_mcs.mean('tracks'), '-o',
             label='{} ({} K)'.format(q_label, BL_init_mean))
ax1.legend()
ax1.set_ylabel('MCS precipitation (mm/hr)', fontsize=10.5)
ax1.set_title('Grouped by B$_{L,tot}$ at Init.', fontsize=10.5)

for n, (tracks, q_label) in enumerate(zip(tracks_list_bp, ['q4','q3','q2','q1'])):
    
    BL_init_mean = np.round(data_BL_features.BL_TOT_bp.sel(tracks=tracks).mean().values*340/9.81*3,decimals=2)
    
    ax2.plot(data_precip_features.mcs_phase,
             data_precip_features.sel(tracks=tracks).precipitationCal_mcs.mean('tracks'), '-o',
             label='{} ({} K)'.format(q_label, BL_init_mean))
ax2.legend()
ax2.set_ylabel('MCS precipitation (mm/hr)', fontsize=10.5)
ax2.set_title('Grouped by B$_{L,tot}$ (positive) at Init.', fontsize=10.5)

In [None]:
# higher buoyancy at init. longer duration?
fig = plt.figure(figsize=(5,3))

duration_list = []
for tracks in tracks_list:
    duration_list.append(data_tracks.sel(tracks=tracks).mcs_duration.values)
    
plt.boxplot(duration_list, labels=['q4','q3','q2','q1'], widths=0.3, showfliers=False, showmeans=True);
plt.ylim([0,40])
plt.ylabel('MCS duration', fontsize=10.5)
plt.xlabel('Mean B$_{L,tot}$ at MCS-init.', fontsize=10.5)
plt.grid(linestyle=':', linewidth=0.5)

In [None]:
%%time
ds_bins_list = []
# multiple-year stats
for year in [2015,2016,2017,2018,2019]:
    
    # directory of the mcs_3dvars files
    dir_envs_track = Path('/neelin2020/mcs_flextrkr/mcs_stats/envs_track/{}/tropics'.format(year))
    files = sorted(list(dir_envs_track.glob('*.nc')))
    track_list = [] # available mcs_envs
    for file in files:
        track_list.append(int(file.name[-8:-3]))

    # track data
    data_tracks = xr.open_dataset('/neelin2020/mcs_flextrkr/mcs_stats/mcs_tracks_non2mcs/mcs_tracks_non2mcs_{}.tropics30NS.nc'.format(year))

    # calculate conditional mean
    ds_bins_list.append(BL_fract_amp_condmn(files))
    
ds_bins_list = xr.concat(ds_bins_list, dim='new_dims').sum('new_dims')

In [None]:
fig, (ax1,ax2,ax3,ax4,ax5) = plt.subplots(1,5,figsize=(15,3))

for n, (mcs_phase,ax) in enumerate(zip(['Init','Grow','Mature','Decay','End'],[ax1,ax2,ax3,ax4,ax5])):
    
    # mean duration
    tmp = (ds_bins_list.samples[n,:,:]/np.sum(ds_bins_list.samples[n,:,:])).values
    tmp[tmp == 0] = np.nan
    cp = ax.pcolormesh(ds_bins_list.bins_fract, ds_bins_list.bins_amp, tmp.T, 
                       vmin=0, vmax=0.04, cmap=mixedmap)
    cbar = plt.colorbar(cp, ax=ax, shrink=0.6, pad=0.02)
    cbar.set_label('occurrence')
    
    tmp2 = (ds_bins_list.samples[n,:,:]/np.sum(ds_bins_list.samples[n,:,:])).values
    tmp2[tmp2 == 0] = np.nan
    ax.contour(ds_bins_list.bins_fract, ds_bins_list.bins_amp, tmp2.T,
              levels=[0.005, 0.01, 0.015, 0.02], colors=['k'], linewidths=1)
    
    ax.set_xlim([0,0.25])
    ax.set_ylim([0,0.006])

    ax.set_title(mcs_phase)
    ax.set_xlabel('BL_FRAC',fontsize=12)
    if n == 0:
        ax.set_ylabel('BL_AMP (m/s$^2$)',fontsize=12)
        
plt.tight_layout()

In [None]:
ds_bins_list

In [None]:
fig, (ax1,ax2,ax3,ax4,ax5) = plt.subplots(1,5,figsize=(15,3))

for n, (mcs_phase,ax) in enumerate(zip(['Init','Grow','Mature','Decay','End'],[ax1,ax2,ax3,ax4,ax5])):
    
    # mean duration
    tmp = (ds_bins_list.mcs_duration_sum[n,:,:]/ds_bins_list.samples[n,:,:]).values
    tmp[tmp == 0] = np.nan
    cp = ax.pcolormesh(ds_bins_list.bins_fract, 340*3/9.8*ds_bins_list.bins_amp, tmp.T, 
                       vmin=10, vmax=50, cmap=mixedmap)
    cbar = plt.colorbar(cp, ax=ax, shrink=0.6, pad=0.02)
    cbar.set_label('MCS duration (hr)')
    
    tmp2 = (ds_bins_list.samples[n,:,:]/np.sum(ds_bins_list.samples[n,:,:])).values
    tmp2[tmp2 == 0] = np.nan
    ax.contour(ds_bins_list.bins_fract, 340*3/9.8*ds_bins_list.bins_amp, tmp2.T,
              levels=[0.005, 0.01, 0.015, 0.02], colors=['k'], linewidths=1)
    
    ax.set_xlim([0,0.25])
    #ax.set_ylim([0,0.006])

    ax.set_title(mcs_phase)
    ax.set_xlabel('BL_FRAC',fontsize=12)
    if n == 0:
        ax.set_ylabel('BL_AMP (K)',fontsize=12)
        
plt.tight_layout()