#### write out filtered MCS tracks (notebook version)
- showing complete MCS lifecycles: genesis, grow, mature, decay, end
- adding ERA-5 rainrate for further screening based on rainfall coherency

In [1]:
import os
import sys
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from datetime import datetime
from pathlib import Path
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
from matplotlib.patches import Rectangle

import cartopy.crs as ccrs
import cartopy.feature as cfeat
from cartopy.util import add_cyclic_point
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

# for t-test precip. mean
from scipy.stats import ttest_ind

import warnings

In [2]:
warnings.filterwarnings('ignore')

In [3]:
# data directoies
dir_mcs_track = Path('/neelin2020/mcs_flextrkr/mcs_stats/')
dir_era5 = Path('/neelin2020/ERA-5/NC_FILES/')
dir_buoy = Path('/neelin2020/ERA-5_buoy/layer_thetae/')

In [4]:
# processing tropical non2mcs_data 

for year in range(2014,2015):
    
    data_track = xr.open_dataset(dir_mcs_track / 'mcs_tracks_final_extc_{}0101.0000_{}0101.0000.nc'.format(year,year+1))
                                 
    # convection over tropics [175W-175E, 30S-30N], 5-deg to escape periodicity
    meanlat = data_track.meanlat.sel(times=0)
    idx_lat = meanlat.where((meanlat > -30) & (meanlat < 30)).dropna(dim='tracks').tracks
    meanlon = data_track.meanlon.sel(times=0)
    idx_lon = meanlon.where((meanlon > -175) & (meanlon < 175)).dropna(dim='tracks').tracks
    idx_reg = np.intersect1d(idx_lat, idx_lon) # tracks starting in the selected region

    data_sub = data_track.sel(tracks=idx_reg)
                                 
    nonmcs_hours = data_sub.mcs_status.sel(times=slice(0,4)).sum(dim='times') 
    mcs_hours = data_sub.mcs_status.sel(times=slice(5,400)).sum(dim='times')
    idx = np.where(nonmcs_hours == 0)[0]
    data_non2mcs = data_sub.isel(tracks=idx)
                                 
    ## generate time indices for tracks showing complete MCS lifetimes
    track_list = []

    for track in data_non2mcs.tracks.values:

        tmp = data_non2mcs.sel(tracks=track).mcs_status
        
        # mature status = max area-averaged precip rate of the given MCS
        #tmp2 = data_non2mcs.sel(tracks=track).total_rain/(data_non2mcs.sel(tracks=track).area/(100))
        # mature status = max area
        tmp2 = data_non2mcs.sel(tracks=track).area
        
        idt_mcs_init = np.where(tmp == 1)[0][0]
        idt_mcs_mature = np.where(tmp2 == tmp2.max('times'))[0][0]
        idt_mcs_end = np.where(tmp == 1)[0][-1]

        mcs_duration = data_non2mcs.sel(tracks=track).mcs_duration.values
        
        cond1 = ((idt_mcs_end - idt_mcs_init + 1) == mcs_duration)
        cond2 = (idt_mcs_end > idt_mcs_mature) 
        cond3 = (idt_mcs_init < idt_mcs_mature)
        cond4 = (tmp.sel(times=idt_mcs_end+1) == 0) # mcs status becomes 0 at the next timestep

        if (cond1 & cond2 & cond3 & cond4):
                
            idt_mcs_grow = idt_mcs_init + (idt_mcs_mature - idt_mcs_init)//2
            idt_mcs_decay = idt_mcs_mature + (idt_mcs_end - idt_mcs_mature)//2

            ds = xr.Dataset(data_vars=dict(
                       idt_mcs_init=(['tracks'], [idt_mcs_init]),
                       idt_mcs_grow=(['tracks'], [idt_mcs_grow]),
                       idt_mcs_mature=(['tracks'], [idt_mcs_mature]),
                       idt_mcs_decay=(['tracks'], [idt_mcs_decay]),
                       idt_mcs_end=(['tracks'], [idt_mcs_end])
                       ),
                       coords=dict(tracks=(['tracks'],[track])))

            track_list.append(ds)

    data_non2mcs_phase = xr.concat(track_list, dim='tracks')                           
    data_non2mcs_complete = data_non2mcs.sel(tracks=data_non2mcs_phase.tracks)

In [6]:
data_non2mcs_complete

In [18]:
idt_mcs_mature

4

In [6]:
data_non2mcs_complete

In [None]:
%%time

ds_tracks_list = []

for track_number in data_non2mcs_complete.tracks[:5]:

    track_duration = data_non2mcs_complete.sel(tracks=track_number).track_duration.values
    base_time = data_non2mcs_complete.sel(tracks=track_number).base_time.values
    meanlat = data_non2mcs_complete.sel(tracks=track_number).meanlat.values
    meanlon = data_non2mcs_complete.sel(tracks=track_number).meanlon.values

    mpr_area_mn = np.zeros(400)*np.nan
    saved_list = 0

    for n, (timestamp, mnlat, mnlon) in enumerate(zip(base_time, meanlat, meanlon)):

        timestamp_str = str(timestamp)

        if timestamp_str != 'NaT': # excluding nan

            year = timestamp_str[:4]
            month = timestamp_str[5:7]
            day = timestamp_str[8:10]
            hour = timestamp_str[11:13]

            # read the file containing the binary mask
            dir_mask = Path('/neelin2020/mcs_flextrkr/{}0101.0000_{}0101.0000/'.format(year,int(year)+1))
            file = list(dir_mask.glob('*{}{}{}_{}*.nc'.format(year,month,day,hour)))[0] # find the corresponding file at the given time
            data_mcsmask = xr.open_dataset(file)
            lon_reset = data_mcsmask.lon
            lon_reset = lon_reset.where(lon_reset >= 0, 360+lon_reset) # converting lon as 0 to 359.75
            data_mcsmask.coords['lon'] = lon_reset # converting lon as -180 to 180
            data_mcsmask= data_mcsmask.sortby('lon')        

            # read the file containing era-5 mean total rain rate 
            dir_mpr = dir_era5 / '{}'.format(year)
            file = list(dir_mpr.glob('era-5.mpr.{}.{}.nc'.format(year, month)))[0]
            data_mpr = xr.open_dataset(file)
            # match the latitude range of two datasets
            data_mpr = data_mpr.reindex(latitude=list(reversed(data_mpr.latitude))) # reverse the order of latitude 
            data_mpr = data_mpr.mtpr.sel(latitude=slice(-60,60))
            data_mpr = data_mpr.sel(time=datetime(int(year),int(month),int(day),int(hour)), method='nearest').drop('time')
            # interpolating into mcs grids
            data_mpr = data_mpr.interp(longitude=data_mcsmask.lon, latitude=data_mcsmask.lat)

            mcstracknumber = data_mcsmask.cloudtracknumber.sel(lat=mnlat, lon=mnlon, method='nearest').values 
            # calculate the area mean of ERA-5 total rain rate
            mcs_mask = data_mcsmask.cloudtracknumber.drop('time')
            mpr_area_mn[n] = 3600*data_mpr.where(mcs_mask == mcstracknumber).mean().values # averaging values within the coarse-grained mask (mm/hr)

            saved_list += 1

    if saved_list == int(track_duration):
        print('nubmer of values matching track_duration... OK!')
    else:
        raise ValueError('the number of values does not match the track duration...check')

    # create xarray 
    ds_single_track = xr.Dataset(data_vars=dict(mean_total_rain_era5 = (['times'], mpr_area_mn)),
                               coords=dict(times = (['times'], np.arange(400)))
                              )
    
    ds_tracks_list.append(ds_single_track)
    
ds_mpr_tracks_xr = xr.concat(ds_tracks_list, dim=pd.Index(data_non2mcs_complete.tracks[:5].values, name='tracks'))

In [None]:
len(ds_tracks_list)

In [None]:
sys.argv()

In [None]:
ds_phase_sub = data_non2mcs_phase.isel(tracks=slice(0,5))
ds_sub = data_non2mcs_complete.isel(tracks=slice(0,5))
ds_merged = xr.merge([ds_sub, ds_phase_sub, ds_mpr_tracks_xr])

In [None]:
ds_merged

In [None]:
fig,ax=plt.subplots(1,1)
((ds_merged.isel(tracks=0).total_rain)/(ds_merged.isel(tracks=0).area/100)).plot(color='k')
ax2=ax.twinx()
ds_merged.isel(tracks=0).mean_total_rain_era5.plot(color='r')

In [None]:
mcs_boolen = mcs_mask.where(mcs_mask == mcstracknumber).interp(lon=data_mpr.longitude, lat=data_mpr.latitude)

In [None]:
mcs_boolen.plot()

In [None]:
mcs_mask = data_mcsmask.cloudtracknumber
mcs_mask

In [None]:
file = list(dir_mask.glob('*{}{}{}_{}*.nc'.format(year,month,day,hour)))[0] # find the corresponding file at the given time
data_mcsmask = xr.open_dataset(file)
data_mcsmask