### numpy array version for composite code with multipleprocessing!

In [None]:
import sys
import os
import xarray as xr
from glob import glob
import numpy as np
import matplotlib.pyplot as plt

from datetime import datetime, timedelta
import cartopy.crs as ccrs
import cartopy.feature as cfeat
from cartopy.util import add_cyclic_point
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

import warnings
import psutil
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
from matplotlib import patches
from matplotlib import cm

from metpy import calc as mpcalc
from metpy.units import units

In [None]:
# multiple processing
import time
import multiprocessing

In [None]:
warnings.filterwarnings('ignore')

In [None]:
os.chdir('/data2/willytsai/python_module/')
import SCAI_calc4obj as scai 

In [None]:
ISCCP_dir = '/data/willytsai/ISCCP/hgg/'
TRMM_dir = '/data2/willytsai/TRMM_3hr/TRMM/'
MERRA2_dir = '/data2/willytsai/MERRA2/'
MERRA2_3d_dir = '/data/willytsai/'
ERA5_dir = '/data/willytsai/ERA5/'
Gridsat_dir = '/data/willytsai/gridsat_BT/remap_0.25deg/'

In [None]:
def event_region(lat_event,lon_event,lon_w,lon_e):
    'select specfic region'
    idx = np.where(np.logical_and(lon_event >= lon_w, lon_event <= lon_e))[0]
    return idx

In [None]:
def subset_MERRA2(file_path,time_cent,lat_cent,lon_cent,days=3,lat_inc=5,lon_inc=5):

    os.chdir(file_path)
    
    n=0
    # daily file 
    date_sel = [time_cent + timedelta(days=day) for day in range(-days,days+1)] # selected files
    for i,date in enumerate(date_sel):
        #print(date)
        date_str = datetime.strftime(date,format='%Y%m%d')
        file = sorted(glob('*'+date_str+'*'))[0]
        data = xr.open_dataset(file)
        data = data.sel(lat=slice(lat_cent-lat_inc,lat_cent+lat_inc),
                        lon=slice(lon_cent-lon_inc,lon_cent+lon_inc))
        
        if n == 0: 
            tmp = data; n+=1
        else:
            tmp = xr.concat([tmp,data],'time')
    
    # extract fetched time window 
    data_sub = tmp.sel(time=slice(time_cent-timedelta(days=days),time_cent+timedelta(days=days)))
    
    return data_sub

In [None]:
def subset_TRMM(file_path,time_cent,lat_cent,lon_cent,days=3,lat_inc=5,lon_inc=5):

    os.chdir(file_path)
    
    n=0
    # daily file
    date_sel = [time_cent + timedelta(days=day) for day in range(-days,days+1)] # selected files
    for i,date in enumerate(date_sel):
        date_str = datetime.strftime(date,format='%Y%m%d')
        files = sorted(glob('*'+date_str+'*')) # sub-daily
        for file in files:
            data = xr.open_dataset(file)
            data = data.sel(nlat=slice(lat_cent-lat_inc,lat_cent+lat_inc),
                            nlon=slice(lon_cent-lon_inc,lon_cent+lon_inc))
            data = data.assign_coords(time=datetime.strptime(file[5:16],'%Y%m%d.%H'))
        
            if n == 0: 
                tmp = data; n+=1
            else:
                tmp = xr.concat([tmp,data],dim='time')
    
    # extract fetched time window 
    data_sub = tmp.sel(time=slice(time_cent-timedelta(days=days),time_cent+timedelta(days=days)))
    
    return data_sub

In [None]:
def subset_gridsat(file_path,time_cent,lat_cent,lon_cent,days=3,lat_inc=5,lon_inc=5):

    os.chdir(file_path)
    
    n=0
    # daily file
    date_sel = [time_cent + timedelta(days=day) for day in range(-days,days+1)] # selected files
    for i,date in enumerate(date_sel):
        date_str = datetime.strftime(date,format='%Y.%m.%d')
        files = sorted(glob('*'+date_str+'*')) # sub-daily
        for file in files:
            data = xr.open_dataset(file)
            data = data.sel(lat=slice(lat_cent-lat_inc,lat_cent+lat_inc)
                            ,lon=slice(lon_cent-lon_inc,lon_cent+lon_inc))
            data = data.assign_coords(time=datetime.strptime(file[11:24],'%Y.%m.%d.%H'))
        
            if n == 0: 
                tmp = data; n+=1
            else:
                tmp = xr.concat([tmp,data],'time')
    
    # extract fetched time window 
    data_sub = tmp.sel(time=slice(time_cent-timedelta(days=days),time_cent+timedelta(days=days)))
    
    return data_sub

In [None]:
def subset_ISCCP(file_path,time_cent,lat_cent,lon_cent,days=3,lat_inc=5,lon_inc=5):
    os.chdir(file_path)

    n=0
    # daily file 
    date_sel = [time_cent + timedelta(days=day) for day in range(-days,days+1)] # selected files
    for i,date in enumerate(date_sel):
        #print(date)
        date_str = datetime.strftime(date,format='%Y.%m.%d')
        files = sorted(glob('*'+date_str+'*'))
        for file in files:
            data = xr.open_dataset(file)
            if lon_cent < 0: 
                lon_cent = 180 + (180 + lon_cent) # -180,180 to ISCCP lon ranging from 0 to 360
            data = data.sel(lat=slice(lat_cent-lat_inc,lat_cent+lat_inc),
                            lon=slice(lon_cent-lon_inc,lon_cent+lon_inc))

            # extract specific cloud types 
            cldamt_call = data.cldamt_ir
            cldamt_call = cldamt_call[0,:,:]
            cldamt_call = cldamt_call.where(cldamt_call<=100,np.nan); #keep values <= 100
            
            cldamt_type = data.cldamt_irtypes   
            cldamt_low = cldamt_type[0,0,:,:]
            cldamt_low = cldamt_low.where(cldamt_low<=100,np.nan); 
            cldamt_mid = cldamt_type[0,1,:,:]
            cldamt_mid = cldamt_mid.where(cldamt_mid<=100,np.nan)
            cldamt_high = cldamt_type[0,2,:,:]
            cldamt_high = cldamt_high.where(cldamt_high<=100,np.nan)
        
            if n == 0: 
                tmp0 = cldamt_call;
                tmp1 = cldamt_low; 
                tmp2 = cldamt_mid; 
                tmp3 = cldamt_high; n+=1
            
            else:
                tmp0 = xr.concat([tmp0,cldamt_call],'time')
                tmp1 = xr.concat([tmp1,cldamt_low],'time')
                tmp2 = xr.concat([tmp2,cldamt_mid],'time')
                tmp3 = xr.concat([tmp3,cldamt_high],'time')

    # extract fetched time window 
    call_sub = tmp0.sel(time=slice(time_cent-timedelta(days=days),time_cent+timedelta(days=days)))
    low_sub = tmp1.sel(time=slice(time_cent-timedelta(days=days),time_cent+timedelta(days=days)))
    mid_sub = tmp2.sel(time=slice(time_cent-timedelta(days=days),time_cent+timedelta(days=days)))
    high_sub = tmp3.sel(time=slice(time_cent-timedelta(days=days),time_cent+timedelta(days=days)))

    return call_sub,low_sub,mid_sub,high_sub

In [None]:
def SCAI_index(prec_sm,bt_sm):

    N_psT = np.zeros(bt_sm.shape[0]) # BT_based
    N_prec = np.zeros(prec_sm.shape[0]) # prec_based
    SCAI_BT = np.zeros(bt_sm.shape[0]) # cldfrac over domain by BT criteria
    SCAI_p = np.zeros(prec_sm.shape[0]) # cldfrac over domain by prec criteria
    
   # N and SCAI indices
    for t in range(bt_sm.shape[0]):
        label_obj, N = scai.labeled_obj(bt_sm[t,:,:].values,cri=240,flag=2) # less than 240 W/m^2
        SCAI_BT[t] = scai.SCAI_calc(label_obj,N,dx=0.25*100000,L=10*100000)
        N_psT[t] = N
    
    for t in range(prec_sm.shape[0]):
        label_obj, N = scai.labeled_obj(prec_sm[t,:,:].values,cri=1,flag=1) # larger than 1mm/day
        SCAI_p[t] = scai.SCAI_calc(label_obj,N,dx=0.25*100000,L=10*100000)    
        N_prec[t] = N

    return (SCAI_BT,SCAI_p,N_psT,N_prec)

In [None]:
# %%time
# cldamt_type_3hr = np.nan*np.zeros((len(date),18,180,360))
# cldamt_3hr = np.nan*np.zeros((len(date),180,360))
# # ISCCP dataset 2014, 3hourly in a single file
# yr = ['2014','2015'] # year
# mn = ['01','02','03','04','05','06','07','08',
#      '09','10','11','12'] # month

# n=1
# for y in yr:
#     for m in mn:
#         os.chdir(ISCCP_dir+y+m)
#         files = sorted(glob('*.nc'))
        
#         for file in files:
#             data = xr.open_dataset(file).cldamt_types # cloud amount [%]
#             data2 = xr.open_dataset(file).cldamt
#             tmp = data; tmp2 = data2
#             cldamt_type_3hr[n-1,:,:,:] = tmp
#             cldamt_3hr[n-1,:,:] = tmp2
#             n+=1
# lat = tmp.lat; lon = tmp.lon
# print(n-1)
# cldamt_type_xr = xr.DataArray(cldamt_type_3hr,coords=[date,range(18),lat,lon],dims=['time','type','lat','lon']); del cldamt_type_3hr
# cldamt_xr = xr.DataArray(cldamt_3hr,coords=[date,lat,lon],dims=['time','lat','lon']); del cldamt_3hr

In [None]:
# lat = cldamt_type_xr.lat
# lon = cldamt_type_xr.lon
# # 0+3 shallow cumulus, 2+5 stratoform, 14+17 deep convection 
# cldamt_cu = (cldamt_type_xr[:,0,:,:]+cldamt_type_xr[:,3,:,:]).values
# cldamt_cu[cldamt_cu>100] = np.nan; cldamt_cu[cldamt_cu< 0] = np.nan;
# cu_xr = xr.DataArray(cldamt_cu,coords=[date,lat,lon],dims=['time','lat','lon']); del cldamt_cu

# cldamt_st = (cldamt_type_xr[:,13,:,:]+cldamt_type_xr[:,16,:,:]).values
# cldamt_st[cldamt_st>100] = np.nan; cldamt_st[cldamt_st< 0] = np.nan;
# st_xr = xr.DataArray(cldamt_st,coords=[date,lat,lon],dims=['time','lat','lon']); del cldamt_st

# cldamt_dc = (cldamt_type_xr[:,14,:,:]+cldamt_type_xr[:,17,:,:]).values
# cldamt_dc[cldamt_dc>100] = np.nan; cldamt_dc[cldamt_dc< 0] = np.nan;
# dc_xr = xr.DataArray(cldamt_dc,coords=[date,lat,lon],dims=['time','lat','lon']); del cldamt_dc

In [None]:
# load data
os.chdir('/data/willytsai/ISCCP/')

# variable namelist
var_name = np.loadtxt('var_name_2014_2018_5deg_4ds.dat',dtype='U16')
# event time
var_time = np.loadtxt('time_event_2014_2018_5deg_4ds.dat',dtype='U13')
time_event = []
for t in var_time:
    tmp = datetime.strptime(t,'%Y-%m-%d-%H')
    time_event.append(tmp)

var_dataset = (np.loadtxt('var_event_2014_2018_5deg_4ds.dat'))

In [None]:
# precipitation events over specific basins: now IND
lat_event = var_dataset[:,0]
lon_event = var_dataset[:,1]
idx_REG = event_region(lat_event,lon_event,-180,180) 

# total event over IND
time_event = np.asarray(time_event); time_event = time_event[idx_REG]
lat_event = var_dataset[idx_REG,0]
lon_event = var_dataset[idx_REG,1]
prec_event = var_dataset[idx_REG,2] 
cwv_event = var_dataset[idx_REG,3]
cwv_era_event = var_dataset[idx_REG,4]
dc_event = var_dataset[idx_REG,5]
cu_event = var_dataset[idx_REG,6]
st_event = var_dataset[idx_REG,7]
cldamt_event = var_dataset[idx_REG,8]
olr_event = var_dataset[idx_REG,9]
bt_event = var_dataset[idx_REG,10]
Nps_event = var_dataset[idx_REG,11]
NpsT_event = var_dataset[idx_REG,12]
Nprec_event = var_dataset[idx_REG,13]
SCAIbt_event = var_dataset[idx_REG,14]
SCAIpr_event = var_dataset[idx_REG,15]
cldfrac_event = var_dataset[idx_REG,16]
precfrac_event = var_dataset[idx_REG,17]
llws_event = var_dataset[idx_REG,18]
Abarbt_event = np.sqrt(cldfrac_event/NpsT_event*1600) # pixels

In [None]:
t_org = np.where(prec_event>0)[0]

In [None]:
len(t_org)

In [None]:
from tqdm import tqdm

In [None]:
# 3-day before and after for the time window 
T_org = np.zeros((len(t_org),49,42))
q_org = np.zeros((len(t_org),49,42))
#rh_org = np.zeros((len(t_org),49,72))
u_org = np.zeros((len(t_org),49,42))
v_org = np.zeros((len(t_org),49,42))
#omega_org = np.zeros((len(t_org),49,42))
#prec_org = np.zeros((len(t_org),49))
#precstd_org = np.zeros((len(t_org),49))
#N_org = np.copy(prec_org)
#SCAIbt_org = np.copy(prec_org)
#bt_org = np.copy(prec_org)
#llws_org = np.copy(prec_org)
# cwv_org = np.copy(prec_org)
# cwvstd_org = np.copy(prec_org)
# cwv_pbl_org = np.copy(prec_org)
# cwv_pblstd_org = np.copy(prec_org)
# T_pbl_org = np.copy(prec_org)
# T_pblstd_org = np.copy(prec_org)
# SF_org = np.copy(prec_org)
# cldfrac_org = np.copy(prec_org)

# dcamt_org = np.copy(prec_org)
# cuamt_org = np.copy(prec_org)
# stamt_org = np.copy(prec_org)

div_org = np.zeros((len(t_org),49,42))
# crh_org = np.copy(prec_org)

#cwvten_dyn_org = np.zeros((len(t_org),48))
#cwvten_phy_org = np.zeros((len(t_org),48))
#cwvten_ana_org = np.zeros((len(t_org),48))

#CFv_org = np.zeros((len(t_org),49,72))
#cin_org = np.copy(prec_org)
#cape_org = np.copy(prec_org)

In [None]:
from multiprocessing import Pool

In [None]:
# %%time
# for t in range(100):
#     data_sub2 = subset_TRMM(TRMM_dir+'2014_2018'
#                            ,time_event[t],lat_event[t],lon_event[t],days=3,
#                            lat_inc=2.5,lon_inc=2.5)                            
#     data_sub5 = subset_gridsat(Gridsat_dir+'2014_2018'
#                            ,time_event[t],lat_event[t],lon_event[t],days=3,
#                            lat_inc=2.5,lon_inc=2.5)
#     prec_sm = data_sub2.precipitation
#     bt_sm = data_sub5.irwin_cdr
#     SCAIbt_sm,SCAIp_sm,NpsT_sm,Nprec_sm = SCAI_index(prec_sm,bt_sm)
#     cldfrac_sm = [len(np.where(bt_sm[n,:,:] < 240)[0])/(len(bt_sm.lat)*len(bt_sm.lon)) for n in range(49)]
#     N_org = NpsT_sm
#     SCAIbt_org = SCAIbt_sm
#     cldfrac_org = np.asarray(cldfrac_sm)                            

In [None]:
%%time
def read_npall_TQUV(t):
#    data_sub1 = subset_MERRA2(MERRA2_3d_dir+'TQUV_3hr'
#                          ,time_event[t],lat_event[t],lon_event[t],days=3,
#                           lat_inc=2.5,lon_inc=2.5)
#    try:
#        data_sub2 = subset_TRMM(TRMM_dir+'2014_2018'
#                           ,time_event[t],lat_event[t],lon_event[t],days=3,
#                             lat_inc=2.5,lon_inc=2.5)
#    data_sub3 = subset_MERRA2(MERRA2_dir+'inst_2d_hourly'
#                           ,time_event[t],lat_event[t],lon_event[t],days=3,
#                            lat_inc=2.5,lon_inc=2.5)
#    data_sub4 = subset_MERRA2(MERRA2_3d_dir+'omega_3hr'
#                           ,time_event[t],lat_event[t],lon_event[t],days=3,
#                            lat_inc=2.5,lon_inc=2.5)
#        data_sub5 = subset_gridsat(Gridsat_dir+'2014_2018'
#                           ,time_event[t],lat_event[t],lon_event[t],days=3,
#                           lat_inc=2.5,lon_inc=2.5)
#     data_sub6 = subset_MERRA2(MERRA2_3d_dir+'CWVtend_3hr'
#                           ,time_event[t],lat_event[t],lon_event[t],days=3,
#                            lat_inc=2.5,lon_inc=2.5)

#    q_sm = data_sub1.QV
#    T_sm = data_sub1.T
#    u_sm = data_sub1.U
#    v_sm = data_sub1.V
#    prec_sm = data_sub2.precipitation
#    cwv_sm = data_sub3.TQV.resample(time='3H').nearest()
#    omega_sm = data_sub4.OMEGA
#    bt_sm = data_sub5.irwin_cdr
    
#     cwvten_dyn_sm = data_sub6.DQVDT_DYN.resample(time='3H').nearest()
#     cwvten_phy_sm = data_sub6.DQVDT_PHY.resample(time='3H').nearest()
#     cwvten_ana_sm = data_sub6.DQVDT_ANA.resample(time='3H').nearest()

#     try:
    data_sub8 = subset_MERRA2(MERRA2_3d_dir+'RH_3hr_p'                     
                           ,time_event[t],lat_event[t],lon_event[t],days=3,
                            lat_inc=2.5,lon_inc=2.5)
    rh_sm = data_sub8.RH # relative humidity
#     except:
#         rh_sm = np.nan*np.zeros((49,72,21,17))
    
#     #==== calculated vars ====
#    lev = T_sm.lev
#    es = 6.1094*np.exp(17.625*(T_sm-273)/(T_sm-273+243.04))
#    p = (q_sm/q_sm)*lev
#    qs = es/(p-es)*0.622
#    crh_sm = np.trapz(q_sm[:,:25,:,:],lev[:25],axis=1)/np.trapz(qs[:,:25,:,:],lev[:25],axis=1) # mass-weighted CRH, <CRH>
    
    # PBL variables 
#    cwv_pbl_sm = np.trapz(q_sm[:,:5,:,:],lev[:5],axis=1)*100 # [mm]
#    cwv_pblstd_sm = [np.nanstd(cwv_pbl_sm[n,:,:]) for n in range(49)]
#    T_pbl_sm = np.nanmean(T_sm[:,:5,:,:],axis=1) # mean T sfc to 900hpa
#    T_pblstd_sm = [np.nanstd(T_pbl_sm[n,:,:]) for n in range(49)]
    
#    llws_sm = np.sqrt((u_sm.sel(lev=900)-u_sm.sel(lev=700))**2 + 
#                   (v_sm.sel(lev=900)-v_sm.sel(lev=700))**2)
        
#    SCAIbt_sm,SCAIp_sm,NpsT_sm,Nprec_sm = SCAI_index(prec_sm,bt_sm)
    
#     SF_sm = [len(np.where(omega_sm[n,:,:].sel(lev=500)>0)[0])/(len(omega_sm.lat)*len(omega_sm.lon)) for n in range(49)]
#    cwvstd_sm = [np.nanstd(cwv_sm[n,:,:]) for n in range(49)]
#    precstd_sm = [np.nanstd(prec_sm[n,:,:]) for n in range(49)]
#    cldfrac_sm = [len(np.where(bt_sm[n,:,:] < 240)[0])/(len(bt_sm.lat)*len(bt_sm.lon)) for n in range(49)]
    
#    div_sm = np.gradient(u_sm,0.625*100*1e3,axis=3)+np.gradient(v_sm,0.5*100*1e3,axis=2) # divergence 3D
    
     # cape and cin
#    p = T_sm.lev.values*units('mbar')
# #     # cape/cin calculation
#     cape_sm = np.zeros(len(T_sm.time))
#     cin_sm = np.copy(cape_sm)
#     for t in range(len(T_sm.time)):
#         T_tmp = np.nanmean(T_sm[t,:,:,:],axis=(1,2))
#         q_tmp = np.nanmean(q_sm[t,:,:,:],axis=(1,2))
#         T = (T_tmp-273.15)*units('degC')
#         q = q_tmp*units('kg/kg')
#         Td = mpcalc.dewpoint(mpcalc.vapor_pressure(p,q))
        
#         try:
#             Tp = mpcalc.parcel_profile(p,T[0],Td[0]).to('degC')                    
#             el_pressure,el_temperature = mpcalc.el(p,T,Td) # equilibrium level
#             el_idx = np.argmin(np.abs(p.magnitude - el_pressure.magnitude))                
#             ELps = [el_pressure.magnitude] # Initialize an array of EL pressures for detrainment profile
#             [CAPE,CIN] = mpcalc.cape_cin(p[:el_idx],T[:el_idx],Td[:el_idx],Tp[:el_idx])
#             cape_sm[t] = CAPE.magnitude
#             cin_sm[t] = CIN.magnitude
#         except:
#             cape_sm[t] = np.nan
#             cin_sm[t] = np.nan
    
#     #=========================
       
#    tmp = np.nanmean(np.nanmean(T_sm,axis=2),axis=2)
#    tmp2 = np.nanmean(np.nanmean(q_sm,axis=2),axis=2)
    tmp3 = np.nanmean(np.nanmean(rh_sm,axis=2),axis=2)
#    tmp4 = np.nanmean(np.nanmean(div_sm,axis=2),axis=2)

#    T_org = tmp
#    T_pbl_org = np.nanmean(np.nanmean(T_pbl_sm,axis=1),axis=1)
#    T_pblstd_org = np.asarray(T_pblstd_sm)
#    q_org = tmp2 
#    u_org = np.nanmean(np.nanmean(u_sm,axis=2),axis=2)
#    v_org =  np.nanmean(np.nanmean(v_sm,axis=2),axis=2)
#    N_org = NpsT_sm
#    SCAIbt_org = SCAIbt_sm
#    bt_org = np.nanmean(np.nanmean(bt_sm,axis=1),axis=1)
#    llws_org = np.nanmean(np.nanmean(llws_sm,axis=1),axis=1)
#    prec_org = np.nanmean(np.nanmean(prec_sm,axis=1),axis=1)
#    precstd_org = np.asarray(precstd_sm)
#    cwv_org = np.nanmean(np.nanmean(cwv_sm,axis=1),axis=1)
#    cwv_pbl_org = np.nanmean(np.nanmean(cwv_pbl_sm,axis=1),axis=1)
#    cwv_pblstd_org = np.asarray(cwv_pblstd_sm)
#    omega_org = np.nanmean(np.nanmean(omega_sm,axis=2),axis=2)
    rh_org = tmp3 
#    crh_org = np.nanmean(np.nanmean(crh_sm,axis=1),axis=1)
#    cwvstd_org = np.asarray(cwvstd_sm)
#     SF_org = np.asarray(SF_sm)
#    cldfrac_org = np.asarray(cldfrac_sm)
#    except:
#        N_org = np.zeros(49)*np.nan
#        SCAIbt_org = np.zeros(49)*np.nan
#        cldfrac_org = np.zeros(49)*np.nan
        
#    div_org = tmp4
#     cwvten_dyn_org = np.nanmean(np.nanmean(cwvten_dyn_sm,axis=1),axis=1)
#     cwvten_phy_org = np.nanmean(np.nanmean(cwvten_phy_sm,axis=1),axis=1)
#     cwvten_ana_org = np.nanmean(np.nanmean(cwvten_ana_sm,axis=1),axis=1)
#    cape_org = cape_sm
#    cin_org = cin_sm
    
    return (rh_org)

if __name__ == '__main__':
    
    pool = Pool()
    time_result = pool.map(read_npall_TQUV, range(len(t_org)),chunksize=5)
    pool.close()

In [None]:
len(t_org)

In [None]:
time_result[0].shape

In [None]:
for n in range(len(t_org)):
    rh_org[n,:,:] = time_result[n]

### save output

In [None]:
os.chdir('/w2-data/willytsai/ISCCP')

#np.save('T_all_5deg.mat.npy',T_org)
#np.save('q_all_5deg.mat.npy',q_org);#np.save('qverta_scat.mat.npy',qverta_scat)
#np.save('u_all_5deg.mat.npy',u_org);#np.save('qverta_scat.mat.npy',qverta_scat)
#np.save('v_all_5deg.mat.npy',v_org);#np.save('qverta_scat.mat.npy',qverta_scat)
#np.save('T_pbl_all_5deg.mat.npy',T_pbl_org);#np.save('qverta_scat.mat.npy',qverta_scat)
#np.save('T_pblstd_all_5deg.mat.npy',T_pblstd_org);#np.save('qverta_scat.mat.npy',qverta_scat)
np.save('N_all_5deg.mat.npy',N_org);#np.save('N_scat.mat.npy',N_scat)
np.save('SCAIbt_all_5deg.mat.npy',SCAIbt_org);#np.save('SCAIbt_scat.mat.npy',SCAIbt_scat)
#np.save('bt_all_5deg.mat.npy',bt_org);#np.save('bt_scat.mat.npy',bt_scat)
#np.save('llws_all_5deg.mat.npy',llws_org);#np.save('llws_scat.mat.npy',llws_scat)
#np.save('prec_all_5deg.mat.npy',prec_org);#np.save('prec_scat.mat.npy',prec_scat)
#np.save('precstd_all_5deg.mat.npy',precstd_org);#np.save('prec_scat.mat.npy',prec_scat)
#np.save('cwv_all_5deg.mat.npy',cwv_org);#np.save('cwv_scat.mat.npy',cwv_scat)
# np.save('dcamt_all.mat.npy',dcamt_org);#np.save('dcamt_scat.mat.npy',dcamt_scat)
# np.save('cuamt_all.mat.npy',cuamt_org);#np.save('cuamt_scat.mat.npy',cuamt_scat)
# np.save('stamt_all.mat.npy',stamt_org);#np.save('stamt_scat.mat.npy',stamt_scat)
#np.save('omega_all_5deg.mat.npy',omega_org);#np.save('omega_scat.mat.npy',omega_scat)
#np.save('rh_all_5deg.mat.npy',rh_org);#np.save('rh_scat.mat.npy',rh_scat)
#np.save('crh_all_5deg.mat.npy',crh_org);#np.save('rh_scat.mat.npy',rh_scat)
#np.save('cwvstd_all_5deg.mat.npy',cwvstd_org);#np.save('cwvstd_scat.mat.npy',cwvstd_scat)
#np.save('SF_all_5deg.mat.npy',SF_org);#np.save('SF_scat.mat.npy',SF_scat)
np.save('cldfrac_all_5deg.mat.npy',cldfrac_org);#np.save('cldfrac_scat.mat.npy',cldfrac_scat)
#np.save('CFv_all_5deg.mat.npy',CFv_org)
#np.save('div_all_5deg.mat.npy',div_org);#np.save('cldfrac_scat.mat.npy',cldfrac_scat)
#np.save('cwvten_dyn_all_5deg.mat.npy',cwvten_dyn_org);#np.save('cldfrac_scat.mat.npy',cldfrac_scat)
#np.save('cwvten_phy_all_5deg.mat.npy',cwvten_phy_org);#np.save('cldfrac_scat.mat.npy',cldfrac_scat)
#np.save('cwvten_ana_all_5deg.mat.npy',cwvten_ana_org);#np.save('cldfrac_scat.mat.npy',cldfrac_scat)
#np.save('cape_all_5deg.mat.npy',cape_org)
#np.save('cin_all_5deg.mat.npy',cin_org)
#np.save('cwv_pbl_all_5deg.mat.npy',cwv_pbl_org)
#np.save('cwv_pblstd_all_5deg.mat.npy',cwv_pblstd_org)

In [None]:
%%time
def read_npall(t):
    data_sub1 = subset_MERRA2(MERRA2_3d_dir+'TQUV_3hr'
                          ,time_event[t],lat_event[t],lon_event[t],days=3,
                           lat_inc=2.5,lon_inc=2.5)
    data_sub2 = subset_TRMM(TRMM_dir+'2014_2018'
                          ,time_event[t],lat_event[t],lon_event[t],days=3,
                           lat_inc=2.5,lon_inc=2.5)
    data_sub3 = subset_MERRA2(MERRA2_dir+'inst_2d_hourly'
                          ,time_event[t],lat_event[t],lon_event[t],days=3,
                           lat_inc=2.5,lon_inc=2.5)
    data_sub4 = subset_MERRA2(MERRA2_3d_dir+'omega_3hr'
                          ,time_event[t],lat_event[t],lon_event[t],days=3,
                           lat_inc=2.5,lon_inc=2.5)
    data_sub5 = subset_gridsat(Gridsat_dir+'2014_2018'
                          ,time_event[t],lat_event[t],lon_event[t],days=3,
                           lat_inc=2.5,lon_inc=2.5)
    data_sub6 = subset_MERRA2(MERRA2_3d_dir+'CWVtend_3hr'
                          ,time_event[t],lat_event[t],lon_event[t],days=3,
                           lat_inc=2.5,lon_inc=2.5)

    q_sm = data_sub1.QV
    T_sm = data_sub1.T
    u_sm = data_sub1.U
    v_sm = data_sub1.V
    prec_sm = data_sub2.precipitation
    cwv_sm = data_sub3.TQV.resample(time='3H').nearest()
    omega_sm = data_sub4.OMEGA
    bt_sm = data_sub5.irwin_cdr
    
    cwvten_dyn_sm = data_sub6.DQVDT_DYN.resample(time='3H').nearest()
    cwvten_phy_sm = data_sub6.DQVDT_PHY.resample(time='3H').nearest()
    cwvten_ana_sm = data_sub6.DQVDT_ANA.resample(time='3H').nearest()

    try:
        data_sub8 = subset_MERRA2(MERRA2_3d_dir+'RH_3hr'                     
                          ,time_event[t],lat_event[t],lon_event[t],days=3,
                           lat_inc=2.5,lon_inc=2.5)
        rh_sm = data_sub8.RH # relative humidity
    except:
        rh_sm = np.nan*np.zeros((49,72,21,17))
    
    #==== calculated vars ====
    lev = T_sm.lev
    es = 6.1094*np.exp(17.625*(T_sm-273)/(T_sm-273+243.04))
    p = (q_sm/q_sm)*lev
    qs = es/(p-es)*0.622
    crh_sm = np.trapz(q_sm[:,:25,:,:],lev[:25],axis=1)/np.trapz(qs[:,:25,:,:],lev[:25],axis=1) # mass-weighted CRH, <CRH>
    
    # PBL variables 
    cwv_pbl_sm = np.trapz(q_sm[:,:5,:,:],lev[:5],axis=1)*100 # [mm]
    cwv_pblstd_sm = [np.nanstd(cwv_pbl_sm[n,:,:]) for n in range(49)]
    T_pbl_sm = np.nanmean(T_sm[:,:5,:,:],axis=1) # mean T sfc to 900hpa
    T_pblstd_sm = [np.nanstd(T_pbl_sm[n,:,:]) for n in range(49)]
    
    llws_sm = np.sqrt((u_sm.sel(lev=900)-u_sm.sel(lev=700))**2 + 
                  (v_sm.sel(lev=900)-v_sm.sel(lev=700))**2)
        
    SCAIbt_sm,SCAIp_sm,NpsT_sm,Nprec_sm = SCAI_index(prec_sm,bt_sm)
    
    SF_sm = [len(np.where(omega_sm[n,:,:].sel(lev=500)>0)[0])/(len(omega_sm.lat)*len(omega_sm.lon)) for n in range(49)]
    cwvstd_sm = [np.nanstd(cwv_sm[n,:,:]) for n in range(49)]
    precstd_sm = [np.nanstd(prec_sm[n,:,:]) for n in range(49)]
    cldfrac_sm = [len(np.where(bt_sm[n,:,:] < 240)[0])/(len(bt_sm.lat)*len(bt_sm.lon)) for n in range(49)]
    
    div_sm = np.gradient(u_sm,0.625*100*1e3,axis=3)+np.gradient(v_sm,0.5*100*1e3,axis=2) # divergence 3D
    
    # cape and cin
    p = T_sm.lev.values*units('mbar')
    # cape/cin calculation
    cape_sm = np.zeros(len(T_sm.time))
    cin_sm = np.copy(cape_sm)
    for t in range(len(T_sm.time)):
        T_tmp = np.nanmean(T_sm[t,:,:,:],axis=(1,2))
        q_tmp = np.nanmean(q_sm[t,:,:,:],axis=(1,2))
        T = (T_tmp-273.15)*units('degC')
        q = q_tmp*units('kg/kg')
        Td = mpcalc.dewpoint(mpcalc.vapor_pressure(p,q))
        
        try:
            Tp = mpcalc.parcel_profile(p,T[0],Td[0]).to('degC')                    
            el_pressure,el_temperature = mpcalc.el(p,T,Td) # equilibrium level
            el_idx = np.argmin(np.abs(p.magnitude - el_pressure.magnitude))                
            ELps = [el_pressure.magnitude] # Initialize an array of EL pressures for detrainment profile
            [CAPE,CIN] = mpcalc.cape_cin(p[:el_idx],T[:el_idx],Td[:el_idx],Tp[:el_idx])
            cape_sm[t] = CAPE.magnitude
            cin_sm[t] = CIN.magnitude
        except:
            cape_sm[t] = np.nan
            cin_sm[t] = np.nan
    
    #=========================
       
    tmp = np.nanmean(np.nanmean(T_sm,axis=2),axis=2)
    tmp2 = np.nanmean(np.nanmean(q_sm,axis=2),axis=2)
    tmp3 = np.nanmean(np.nanmean(rh_sm,axis=2),axis=2)
    tmp4 = np.nanmean(np.nanmean(div_sm,axis=2),axis=2)

    T_org = tmp
    T_pbl_org = np.nanmean(np.nanmean(T_pbl_sm,axis=1),axis=1)
    T_pblstd_org = np.asarray(T_pblstd_sm)
    q_org = tmp2 
    u_org = np.nanmean(np.nanmean(u_sm,axis=2),axis=2)
    v_org =  np.nanmean(np.nanmean(v_sm,axis=2),axis=2)
    N_org = NpsT_sm
    SCAIbt_org = SCAIbt_sm
    bt_org = np.nanmean(np.nanmean(bt_sm,axis=1),axis=1)
    llws_org = np.nanmean(np.nanmean(llws_sm,axis=1),axis=1)
    prec_org = np.nanmean(np.nanmean(prec_sm,axis=1),axis=1)
    precstd_org = np.asarray(precstd_sm)
    cwv_org = np.nanmean(np.nanmean(cwv_sm,axis=1),axis=1)
    cwv_pbl_org = np.nanmean(np.nanmean(cwv_pbl_sm,axis=1),axis=1)
    cwv_pblstd_org = np.asarray(cwv_pblstd_sm)
    omega_org = np.nanmean(np.nanmean(omega_sm,axis=2),axis=2)
    rh_org = tmp3 
    crh_org = np.nanmean(np.nanmean(crh_sm,axis=1),axis=1)
    cwvstd_org = np.asarray(cwvstd_sm)
    SF_org = np.asarray(SF_sm)
    cldfrac_org = np.asarray(cldfrac_sm)
    
#     try:
#         data_sub7 = subset_MERRA2(MERRA2_3d_dir+'Cloudfrac_3hr'
#                           ,time_event[t],lat_event[t],lon_event[t],days=3,
#                           lat_inc=2.5,lon_inc=2.5)
#         CFv_sm = data_sub7.CLOUD
#         tmp5 = np.nanmean(np.nanmean(CFv_sm,axis=2),axis=2)
#         CFv_org[i+rst_idx,:,:tmp5.shape[1]] = tmp5
#     except:
    CFv_org = np.nan
    
    div_org = tmp4
    cwvten_dyn_org = np.nanmean(np.nanmean(cwvten_dyn_sm,axis=1),axis=1)
    cwvten_phy_org = np.nanmean(np.nanmean(cwvten_phy_sm,axis=1),axis=1)
    cwvten_ana_org = np.nanmean(np.nanmean(cwvten_ana_sm,axis=1),axis=1)
    cape_org = cape_sm
    cin_org = cin_sm
    
    return (T_org,T_pbl_org,T_pblstd_org,q_org,u_org,v_org,N_org,SCAIbt_org,
            bt_org,llws_org,prec_org,precstd_org,cwv_org,cwv_pbl_org,cwv_pblstd_org,
            omega_org,rh_org,crh_org,cwvstd_org,SF_org,cldfrac_org,CFv_org,div_org,
            cwvten_dyn_org,cwvten_phy_org,cwvten_ana_org,cape_org,cin_org)

if __name__ == '__main__':
    
    pool = Pool()
    time_result = pool.map(read_npall, range(20))
    pool.close()

In [None]:
%%time
def read_np(num):
    data_sub1 = subset_MERRA2(MERRA2_3d_dir+'TQUV_3hr'
                          ,time_event[num],lat_event[num],lon_event[num],days=3
                          ,lat_inc=2.5,lon_inc=2.5)
    T_sm = data_sub1.T
    tmp = np.nanmean(np.nanmean(T_sm,axis=2),axis=2)
    return tmp

def read_np2(num):
    data_sub1 = subset_MERRA2(MERRA2_3d_dir+'TQUV_3hr'
                          ,time_event[num],lat_event[num],lon_event[num],days=3
                          ,lat_inc=2.5,lon_inc=2.5)
    q_sm = data_sub1.QV
    tmp = np.nanmean(np.nanmean(q_sm,axis=2),axis=2)
    return tmp
    
if __name__ == '__main__':
    
    pool = Pool(processes=None)
    time_result = pool.map(read_np, range(0,10000),chunksize=500)
    time2_result = pool.map(read_np2, range(0,10000),chunksize=500)
    pool.close()

In [None]:
test=[]
for i in time_result:
    test.append(i.values)

#test2=[]
#for i in time2_result:
#    test2.append(i)

In [None]:
test = np.asarray(time2_result)

In [None]:
test.shape

In [None]:
time_event[:10]

In [None]:
result1[24][0]

In [None]:
result2[24][1]

In [None]:
time_event[24]

In [None]:
%%time
for num in range(0,100):
    data_sub1 = subset_MERRA2(MERRA2_3d_dir+'TQUV_3hr'
                          ,time_event[num],lat_event[num],lon_event[num],days=3
                          ,lat_inc=2.5,lon_inc=2.5)
    T_sm = data_sub1.T
    tmp = np.nanmean(np.nanmean(T_sm,axis=2),axis=2)
    
    q_sm = data_sub1.QV
    tmp2 = np.nanmean(np.nanmean(q_sm,axis=2),axis=2)
    
    test_array[num,:,:] = tmp
    test2_array[num,:,:] = tmp2

In [None]:
os.chdir('/data/willytsai/ISCCP')
np.save('T_all.mat.npy',T_org);#np.save('qverta_scat.mat.npy',qverta_scat)
np.save('q_all.mat.npy',q_org);#np.save('qverta_scat.mat.npy',qverta_scat)
np.save('u_all.mat.npy',u_org);#np.save('qverta_scat.mat.npy',qverta_scat)
np.save('v_all.mat.npy',v_org);#np.save('qverta_scat.mat.npy',qverta_scat)
np.save('N_all.mat.npy',N_org);#np.save('N_scat.mat.npy',N_scat)
np.save('SCAIbt_all.mat.npy',SCAIbt_org);#np.save('SCAIbt_scat.mat.npy',SCAIbt_scat)
np.save('bt_all.mat.npy',bt_org);#np.save('bt_scat.mat.npy',bt_scat)
np.save('llws_all.mat.npy',llws_org);#np.save('llws_scat.mat.npy',llws_scat)
np.save('prec_all.mat.npy',prec_org);#np.save('prec_scat.mat.npy',prec_scat)
np.save('precstd_all.mat.npy',precstd_org);#np.save('prec_scat.mat.npy',prec_scat)
np.save('cwv_all.mat.npy',cwv_org);#np.save('cwv_scat.mat.npy',cwv_scat)
# np.save('dcamt_all.mat.npy',dcamt_org);#np.save('dcamt_scat.mat.npy',dcamt_scat)
# np.save('cuamt_all.mat.npy',cuamt_org);#np.save('cuamt_scat.mat.npy',cuamt_scat)
# np.save('stamt_all.mat.npy',stamt_org);#np.save('stamt_scat.mat.npy',stamt_scat)
np.save('omega_all.mat.npy',omega_org);#np.save('omega_scat.mat.npy',omega_scat)
np.save('rh_all.mat.npy',rh_org);#np.save('rh_scat.mat.npy',rh_scat)
np.save('crh_all.mat.npy',crh_org);#np.save('rh_scat.mat.npy',rh_scat)
np.save('cwvstd_all.mat.npy',cwvstd_org);#np.save('cwvstd_scat.mat.npy',cwvstd_scat)
np.save('SF_all.mat.npy',SF_org);#np.save('SF_scat.mat.npy',SF_scat)
np.save('cldfrac_all.mat.npy',cldfrac_org);#np.save('cldfrac_scat.mat.npy',cldfrac_scat)
np.save('CFv_all.mat.npy',CFv_org)
np.save('div_all.mat.npy',div_org);#np.save('cldfrac_scat.mat.npy',cldfrac_scat)
np.save('cwvten_dyn_all.mat.npy',cwvten_dyn_org);#np.save('cldfrac_scat.mat.npy',cldfrac_scat)
np.save('cwvten_phy_all.mat.npy',cwvten_phy_org);#np.save('cldfrac_scat.mat.npy',cldfrac_scat)
np.save('cwvten_ana_all.mat.npy',cwvten_ana_org);#np.save('cldfrac_scat.mat.npy',cldfrac_scat)
np.save('cape_all.mat.npy',cape_org)
np.save('cin_all.mat.npy',cin_org)
np.save('cwv_pbl_all.mat.npy',cwv_pbl_all.mat.npy)
np.save('cwv_pblstd_all.mat.npy',cwv_pblstd_all.)

### load cases org and scat

In [None]:
os.chdir('/data/willytsai/ISCCP')
T_all = np.load('T_all.mat.npy')
u_all = np.load('u_all.mat.npy')
v_all = np.load('v_all.mat.npy')
q_all = np.load('q_all.mat.npy')
N_all = np.load('N_all.mat.npy')
SCAIbt_all = np.load('SCAIbt_all.mat.npy')
bt_all = np.load('bt_all.mat.npy')
llws_all = np.load('llws_all.mat.npy')
prec_all = np.load('prec_all.mat.npy')
precstd_all = np.load('precstd_all.mat.npy')
cwv_all = np.load('cwv_all.mat.npy')
crh_all = np.load('crh_all.mat.npy')
# dcamt_all = np.load('dcamt_all.mat.npy')
# cuamt_all = np.load('cuamt_all.mat.npy')
# stamt_all = np.load('stamt_all.mat.npy')
omega_all = np.load('omega_all.mat.npy')
rh_all = np.load('rh_all.mat.npy')
cwvstd_all = np.load('cwvstd_all.mat.npy')
SF_all = np.load('SF_all.mat.npy')
cldfrac_all = np.load('cldfrac_all.mat.npy')
CFv_all = np.load('CFv_all.mat.npy')
div_all  = np.load('div_all.mat.npy')
cwvten_dyn_all = np.load('cwvten_dyn_all.mat.npy')
cwvten_phy_all = np.load('cwvten_phy_all.mat.npy')
cwvten_ana_all = np.load('cwvten_ana_all.mat.npy')
cwvten_tot_all = cwvten_dyn_all+cwvten_phy_all+cwvten_ana_all
Abar_all = cldfrac_all/N_all*1600
cape_all = np.load('cape_all.mat.npy')
cin_all = np.load('cin_all.mat.npy')