#### RGMA feature-precipitation-environment dataset 
- a prototpye of dataset generation using a module
- standard input data indicating the location of the track or object
- standard 2-D / 3-D output
- a showcase for TC tracks 

In [None]:
import os
import sys
import xarray as xr
import numpy as np
import matplotlib
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from datetime import datetime
from pathlib import Path
import cartopy.crs as ccrs
import warnings

In [None]:
warnings.filterwarnings('ignore')

#### load feature-environent module v1.0
- load the module including class "ds_feature_environment"

In [None]:
os.chdir('/scratch/wmtsai/featenv_test/')

In [None]:
from feature_environment_module import *
import json

In [None]:
# read feature_list.json and variable_list.json as inputs

main_dir = Path('/scratch/wmtsai/featenv_test/')

feature_json = open(main_dir / 'feature_list.json')
variable_json = open(main_dir / 'varible_list.json')
feature_settings = json.load(feature_json)
variable_settings = json.load(variable_json)

In [None]:
# call the feature-environemnt module
featenv = ds_feature_environment()
print('version: ', featenv.__version__)
featenv.name = feature_settings['feature'][0]['name']
featenv.feature_data_sources = feature_settings['feature'][0]['feature_sources']
featenv.environmental_data_sources = feature_settings['feature'][0]['feature_environment_sources']
featenv.track_frequency = feature_settings['feature'][0]['track_frequency']
featenv.env_frequency = feature_settings['feature'][0]['track_frequency']
featenv.feature_track = eval(feature_settings['feature'][0]['is_feature_track'])
featenv.feature_mask = eval(feature_settings['feature'][0]['is_feature_mask'])
featenv.lon_env = np.arange(0,360,0.25)
featenv.lat_env = np.arange(-90,90.25,0.25)

year_process = 2014

# create directories according to the above descriptions
main_dir = '/scratch/wmtsai/featenv_test/{}/{}/'.format(featenv.name, year_process)
featenv.create_featenv_directory(main_dir)

# locate and read the preprocessed track file
featenv.track_data =  xr.open_dataset(feature_settings['feature'][0]['track_data'])
# check dimensions: coords=[ tracks, time ] variables=[ base_time, meanlat, meanlon ]
coords_track = []
vars_track = []
for i in featenv.track_data.dims:
    coords_track.append(i)
for i in featenv.track_data.keys():
    vars_track.append(i)

a = set(coords_track)
b = set(['tracks','time'])
c = set(vars_track)
d = set(['base_time','meanlon','meanlat'])
if ((a & b) == {'time','tracks'}) and ((c & d) == {'base_time','meanlon','meanlat'}):
    print('Track data...ready')
else:
    sys.exit('Incorret input format...Check the input file') # exit due to an incorrect track file

(featenv.track_data).to_netcdf(featenv.track_dir / 'track_geoinfo.nc')

In [None]:
featenv.variable_format

In [None]:
year = str(2014)
month = str(1).zfill(2)
day = str(20).zfill(2)
hour = str(12).zfill(2)

var = 'T'

data_dir = Path(str(featenv.locate_env_data[var]))
data_str = featenv.variable_format[var]
# modify the default file string with datetime info
tmp = data_str.replace('X',var).replace('YYYY',year).replace('MM',month)
tmp = tmp.replace('DD',day).replace('HH',hour)
filename = data_dir /'{}'.format(year)/ tmp

filename

In [None]:
featenv.env3d_dir

In [None]:
# load preprocessed track data
track_dir = Path('/neelin2020/TempestExtremes/TC')
track_data = featenv.load_track_data(track_dir / 'ERA5_TC_tracks_2014.nc')
featenv.track_data = track_data.isel(tracks=slice(0,10)) # first 10 tracks as examples
# save into feature_catalogs/track/
(featenv.track_data).to_netcdf(featenv.track_dir / '{}_geoinfo.2014.nc'.format(featenv.name))

In [None]:
%%time
# extract feat-env data for individual tracks
ds_merged = []
for track in featenv.track_data.tracks.values[:3]: # take the first 100 tracks as example
    ds_env_vars = featenv.get_environment_vars_track(track_id=track, lat_range=15, lon_range=15)
    ds_feat_vars = featenv.get_feature_vars_track(track_id=track, lat_range=15, lon_range=15)
    ds_vars = xr.merge([ds_env_vars, ds_feat_vars], compat='override') # some float differeces. TBD
    ds_merged.append(ds_vars)
ds_merged_xr = xr.concat(ds_merged, dim=pd.Index(featenv.track_data.tracks.values[:3], name='tracks'))

In [None]:
%%time
# save feature and environmental variables accordingly
for var in ds_merged_xr.keys():

    if var != 'base_time':
        ds = ds_merged_xr[var]
        check3d = [i for i in ds.dims if i == 'level']
        if check3d and len(ds.dims) > 2:
            out_dir = featenv.env3d_dir
        elif len(ds.dims) > 2:
            out_dir = featenv.env2d_dir

        print(out_dir)
        ds.to_netcdf(out_dir / '{}_{}_merged.nc'.format(featenv.name, var), encoding={var: {'dtype': 'float32'}})
        print('save file: {}_{}_merged.nc'.format(featenv.name, var))

#### showcase 

In [None]:
def TC_env_plot(featenv_name, year, track_id):
    """
    plot tb, precip, wind vector, slp etc.
    """
    
    # figure configuration 3 x 3
    fig,((ax1,ax2,ax3),(ax4,ax5,ax6),(ax7,ax8,ax9)) = plt.subplots(3,3,figsize=(6,6))
    
    track_dir = Path('/scratch/wmtsai/featenv_test/{}/{}/feature_catalogs/track'.format(featenv_name,year))
    env2d_dir = Path('/scratch/wmtsai/featenv_test/{}/{}/environment_catalogs/VARS_2D'.format(featenv_name,year))
    env3d_dir = Path('/scratch/wmtsai/featenv_test/{}/{}/environment_catalogs/VARS_3D'.format(featenv_name,year))
    
    ds = xr.open_dataset(track_dir / '{}_geoinfo.2014.nc'.format(featenv_name))
    ds_track = ds.isel(tracks=track_id)
    
    var2d_list = ['tb', 'precipitation', 'msl']
    ds_list = []
    for var in var2d_list:
        ds = xr.open_dataset(env2d_dir / '{}_{}_merged.nc'.format(featenv_name, var))
        ds_sub = ds.isel(tracks=track_id)
        ds_list.append(ds_sub)
    ds_env = xr.merge(ds_list)
    
    # split the entire track into 9 timesteps: 4 before SLP_min and 4 after
    ds_subtime = []
    idt_slpmin = ds_track.slp.argmin('time')
    idx_end = np.where(np.isnat(ds_track.base_time.values))[0][0]
    total_length = idx_end - 1
    
    if ( (idt_slpmin >= 4) and ((total_length - idt_slpmin) >= 4) ):
        time_sel = np.concatenate((np.linspace(0, idt_slpmin, 5, dtype='int'), 
                                   np.linspace(idt_slpmin, total_length, 5, dtype='int')[1:]))
        ds_track_subtime = ds_track.isel(time=time_sel)
        ds_env_subtime = ds_env.isel(time=time_sel)
        
    else:
        
        data_track_subtime = ds_track
        data_env_subtime = ds_env

    for t,ax in enumerate([ax1,ax2,ax3,ax4,ax5,ax6,ax7,ax8,ax9]):
        # Tb
        dst = ds_track_subtime.isel(time=t)
        dse = ds_env_subtime.isel(time=t)
        ax.pcolormesh(dse.x, dse.y, dse.tb, vmin=220, vmax=280, cmap='Blues')
        # precip.
        ax.contour(dse.x, dse.y, dse.precipitation, levels=[5,10,15], colors=['m'], linewidths=1)
        # surface pressure
        cf = ax.contour(dse.x, dse.y, dse.msl/100, levels=np.arange(980,1012,4), colors=['green'], linewidths=1)
        ax.clabel(cf, inline=None, fontsize=5)
        ## u,v wind vector @850hPa
        #ax.quiver(dse.x[::5], dse.y[::5], dse.u[::5,::5], dse.v[::5,::5], color='yellow')
        
        ax.set_title(str(dst.base_time.values)[:13], fontsize=9, y=0.97)        
        ax.tick_params(labelsize=7)
        
        ax.spines['bottom'].set_color('w')
        ax.spines['top'].set_color('w')
        ax.spines['left'].set_color('w')
        ax.spines['right'].set_color('w')
        
    plt.tight_layout(h_pad=0.03, w_pad=0.01)
    
    ax5.spines['bottom'].set_color('r')
    ax5.spines['top'].set_color('r')
    ax5.spines['left'].set_color('r')
    ax5.spines['right'].set_color('r')
    
    plt.suptitle('Tropical cyclone: No. {}, {}'.format(track_id, year), y=1.02, fontsize=10)

    return ds_env_subtime

In [None]:
out_dir = Path('/scratch/wmtsai/featenv_test/TC_TempestExtremes/2014/')

In [None]:
geotrack = xr.open_dataset(out_dir / 'feature_catalogs/track/track_geoinfo.nc')

In [None]:
tmp = geotrack.isel(tracks=0).base_time.values
idt_length = np.where(np.isnat(tmp))[0]

In [None]:
for n in range(len(geotrack.tracks)):
    
    tmp = geotrack.isel(tracks=n).base_time.values
    idt_length = len(np.where(~np.isnat(tmp))[0])
    print(idt_length)