In [1]:
import os
import sys
import xarray as xr
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from datetime import datetime
from pathlib import Path
from itertools import combinations

import cartopy.crs as ccrs
import cartopy.feature as cfeat
from cartopy.util import add_cyclic_point
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

import warnings

In [2]:
warnings.filterwarnings('ignore')

In [3]:
FID_label = ['AR','FT','MCS','LPS','AF','AM','AL','FM','FL','ML','AFM','AFL','AML','FML','All','UE','DC','CG']

In [93]:
def process_data_fid_update(data_fid_mask, front_replace=False):
    
    """ updating the origional FID data by adding DC and CG
        set front_replace = True to replace FT and FT-associated labels over the tropics
        input: data_fid_mask (time, lat, lon), feat_comb_label
    """

    data_fid_label = data_fid_mask.feat_comb_label
    data_dc_mask = data_fid_mask.deep_conv_mask # (0,1)
    data_cg_mask = data_fid_mask.nondeep_conv_mask + 1 # now is (1,2) instead for later screening processes 
    
    if front_replace == False:
    
        data_dc_label = data_dc_mask.where((data_dc_mask == 1) & (data_fid_label == 16), 0)
        data_cg_label = data_cg_mask.where((data_cg_mask == 2) & (data_fid_label == 16), 0)

        data_fid_final = data_fid_label + data_dc_label + data_cg_label # now ranges from 1 to 18    
    
    if front_replace == True:
        
        ### ad-hoc approach to mask out "FT" and the associated over the tropics
        FID_label = ['AR','FT','MCS','LPS','AF','AM','AL','FM','FL','ML','AFM'
                     ,'AFL','AML','FML','All','UE','DC','CG']
        idx_F = []
        for n,fid in enumerate(FID_label):
            for char in fid:
                if (char == 'F') or (char == 'l'): # label with front and All
                    idx_F.append(n+1)
                    break
        idx_F_replace = [16, 1, 3, 4, 6, 7, 10, 13] # convert to: UE, AR, MCS, LPS, AM, AL, ML, AML
    
        # keep mid-high latitudes
        dum1 = data_fid_label.where((data_fid_label.latitude >= 20), 0)
        dum2 = data_fid_label.where((data_fid_label.latitude <= -20), 0)
        data_fid_extra = dum1 + dum2
        
        # keep tropics for unaffected labels
        cond_tropics = (data_fid_label.latitude < 20) & (data_fid_label.latitude > -20)
        data_fid_tropics = data_fid_label.where(cond_tropics, 0)

        data_fid_list = []
        for label_F, label_replace in zip(idx_F, idx_F_replace):

            data_fid_unique = data_fid_label.copy()
            data_fid_unique = data_fid_unique.where(data_fid_unique == label_F, 0)
            data_fid_unique = data_fid_unique.where(data_fid_unique == 0, label_replace) 
            data_fid_unique = data_fid_unique.where(cond_tropics, 0)

            data_fid_list.append(data_fid_unique)

        data_fid_null = data_fid_unique*0 # empty to be filled
        for data_replace in data_fid_list:

            data_fid_null += data_replace 
        data_fid_update = data_fid_extra + data_fid_tropics + data_fid_null
        
        # final step: adding DC & CG
        data_dc_label = data_dc_mask.where((data_dc_mask == 1) & (data_fid_update == 16), 0)
        data_cg_label = data_cg_mask.where((data_cg_mask == 2) & (data_fid_update == 16), 0)

        data_fid_final = data_fid_update + data_dc_label + data_cg_label # now ranges from 1 to 18  
        
        ######## issues to be solved #####
        data_fid_final = data_fid_final.where(data_fid_final <= 18, 0)
    
    return data_fid_final

In [97]:
%%time

year =2004

data_dir = Path('/neelin2020/RGMA_feature_mask/data_product/{}/MERGED_FP'.format(year))

files = sorted(list(data_dir.glob('*_expand.nc')))
data_FP_merged = xr.open_mfdataset(files)
data_gpm = data_FP_merged.precipitationCal.sel(longitude=np.arange(0,360,0.25))

# add post-defined deep convection and congestus
files = sorted(list(data_dir.glob('*_convmask.nc')))
data_fid_mask = xr.open_mfdataset(files)
data_dc_mask = data_fid_mask.deep_conv_mask # (0,1)
data_cg_mask = data_fid_mask.nondeep_conv_mask + 1 # now is (1,2) instead for later screening processes

data_fid_update = process_data_fid_update(data_fid_mask, front_replace=True)

# update data_gpm: some minor mismatches, so make sure the sum of total explained equals to 1
data_gpm_tmatch = data_gpm.sel(time=data_fid_update.time)
data_gpm_tmatch = data_gpm_tmatch.where(data_fid_update > 0, 0)

explained_pcp_list = []

for fid, fid_name in zip(np.arange(1,19), FID_label): # 1-18 for defined FIDs

    dum = data_fid_update.copy()
    dum2 = data_gpm_tmatch.where(dum == fid, 0) # extract only pixels associated with the specified FID    

    explained_precip = (dum2.sum('time')/data_gpm_tmatch.sum('time')).compute() # how much rain amount explained by specific type
    explained_pcp_pid = explained_precip.to_dataset().rename_vars({'precipitationCal':'pcp_explained'})
    explained_pcp_list.append(explained_pcp_pid)
    
explained_pcp_xr = xr.concat(explained_pcp_list, pd.Index(FID_label, name='feature_id'))

In [111]:
%%time

year_list = np.arange(2001,2020) # 2001-2019

explained_pcp_multiyr = []

for year in year_list:

    print('processing year: {}'.format(year))
    
    data_dir = Path('/neelin2020/RGMA_feature_mask/data_product/{}/MERGED_FP'.format(year))

    files = sorted(list(data_dir.glob('*_expand.nc')))
    data_FP_merged = xr.open_mfdataset(files)
    data_gpm = data_FP_merged.precipitationCal.sel(longitude=np.arange(0,360,0.25))

    files = sorted(list(data_dir.glob('*_convmask.nc')))
    data_fid_mask = xr.open_mfdataset(files)

    # add post-defined deep convection and congestus and repalce FT over tropics 
    data_fid_update = process_data_fid_update(data_fid_mask, front_replace=True)

    # update data_gpm: some minor mismatches, so make sure the sum of total explained equals to 1
    data_gpm_tmatch = data_gpm.sel(time=data_fid_update.time)
    data_gpm_tmatch = data_gpm_tmatch.where(data_fid_update > 0, 0)

    explained_pcp_list = []

    for fid, fid_name in zip(np.arange(1,19), FID_label): # 1-16 for defined FIDs

        dum = data_fid_update.copy()
        dum2 = data_gpm_tmatch.where(dum == fid, 0) # extract only pixels associated with the specified FID    

        explained_precip = (dum2.sum('time')/data_gpm_tmatch.sum('time')).compute() # how much rain amount explained by specific type
        explained_pcp_pid = explained_precip.to_dataset().rename_vars({'precipitationCal':'pcp_explained'})
        explained_pcp_list.append(explained_pcp_pid)

    explained_pcp_multiyr.append(xr.concat(explained_pcp_list, pd.Index(FID_label, name='feature_id')))
        
explained_pcp_multiyr_xr = xr.concat(explained_pcp_multiyr, pd.Index(year_list, name='year'))
explained_pcp_multiyr_xr.to_netcdf('/neelin2020/RGMA_feature_mask/data_product/multi_year_stats/explained_pcp_2001_2019_RmTroFT.nc')

processing year: 2001
processing year: 2002
processing year: 2003
processing year: 2004
processing year: 2005
processing year: 2006
processing year: 2007
processing year: 2008
processing year: 2009
processing year: 2010
processing year: 2011
processing year: 2012
processing year: 2013
processing year: 2014
processing year: 2015
processing year: 2016
processing year: 2017
processing year: 2018
processing year: 2019
CPU times: user 7h 21min 29s, sys: 10h 40min 43s, total: 18h 2min 12s
Wall time: 3h 12min 28s


In [None]:
explained_pcp_multiyr_xr