In [1]:
import os
import sys
import xarray as xr
import numpy as np
import matplotlib
import pandas as pd
from matplotlib import pyplot as plt
from datetime import datetime
from pathlib import Path
import cartopy.crs as ccrs
import warnings

In [2]:
warnings.filterwarnings('ignore')

In [3]:
TC_dir = Path('/neelin2020/TempestExtremes/TC')

In [12]:
%%time
TC_file = open(TC_dir / 'ERA5_TC_tracks_20200824.txt', mode='r')
var_list = ['x grid point of the centre','y grid point of the centre','longitude','latitude',
           'slp','Pressure drop','Maximum Surface Wind','year','month','day','hour']
df_list = []
with open(TC_dir / 'ERA5_TC_tracks_20200824.txt', 'r') as f:
    
    lines = f.readlines()
    
    for n,line in enumerate(lines):

        line_current = line.split()
        if n <= (len(lines)-2): # (n-1) n means total line number
            line_next = lines[n+1].split()

        if line_current[0] == 'start': # if a new track starts, showing a header info only
            df_track = pd.DataFrame() # reset dataframe for a new track
            
        else:  # expanding the info of the current track
            tmp = pd.DataFrame(data=np.asarray(line_current).reshape(1,11), columns=var_list)
            df_track = pd.concat([df_track, tmp])

        # end of current track
        if line_next[0] == 'start': 
            
            # save df_track into the list as a xarray.dataset
            timestamp_list = []
            lon_list = []
            lat_list = []
            sp_list = []
            for t in range(len(df_track['year'])):
                year = int(df_track.iloc[t].year)
                month = int(df_track.iloc[t].month)
                day = int(df_track.iloc[t].day)
                hour = int(df_track.iloc[t].hour)
                timestamp = datetime(year, month, day, hour)
                timestamp_list.append(timestamp)
                
                lon_list.append(float(df_track.iloc[t].longitude))
                lat_list.append(float(df_track.iloc[t].latitude))
                sp_list.append(float(df_track.iloc[t].slp))
            
            # create xarray.dataset
            ds = xr.Dataset(data_vars = dict(
                            meanlon = (['time'], lon_list),
                            meanlat = (['time'], lat_list),
                            slp = (['time'], sp_list),
                            base_time = (['time'], timestamp_list)),
                           
                            coords= dict(time = (['time'], range(len(timestamp_list)))),
                            attrs = dict(description='TempestExtreme TC'))
            
            df_list.append(ds)

        # for the last track
        if n == (len(lines)-1): # the last line
            
            # save df_track into the list as a xarray.dataset
            timestamp_list = []
            lon_list = []
            lat_list = []
            sp_list = []
            for t in range(len(df_track['year'])):
                year = int(df_track.iloc[t].year)
                month = int(df_track.iloc[t].month)
                day = int(df_track.iloc[t].day)
                hour = int(df_track.iloc[t].hour)
                timestamp = datetime(year, month, day, hour)
                timestamp_list.append(timestamp)
                
                lon_list.append(float(df_track.iloc[t].longitude))
                lat_list.append(float(df_track.iloc[t].latitude))
                sp_list.append(float(df_track.iloc[t].slp))                
            
            # create xarray.dataset
            ds = xr.Dataset(data_vars = dict(
                            meanlon = (['time'], lon_list),
                            meanlat = (['time'], lat_list),
                            slp = (['time'], sp_list),
                            base_time = (['time'], timestamp_list)),
                           
                            coords= dict(time = (['time'], range(len(timestamp_list)))),
                            attrs = dict(description='TempestExtreme TC tracks',
                                         frequency='6 hourly',
                                         source='ERA5, 0.25-deg.',
                                         ))
            
            df_list.append(ds)

CPU times: user 39.3 s, sys: 16.7 ms, total: 39.3 s
Wall time: 39.4 s


In [13]:
# merge all tracks into a single xarray.dataset file 
ds_TC = xr.concat(df_list, dim=pd.Index(np.arange(len(df_list)), name='tracks'))

In [14]:
# extract 2000-2020 to match existing EAR5 data
idx_year = np.asarray([str(i)[:4] for i in ds_TC.base_time.isel(time=0).values], dtype='int')
idx_select = np.where(np.logical_and(idx_year >= 2001, idx_year <= 2020))[0]
ds_TC_20yr = ds_TC.isel(tracks=idx_select)

In [15]:
ds_TC_20yr.to_netcdf('/neelin2020/TempestExtremes/TC/ERA5_TC_tracks_2001_2020.nc'
                     , encoding={'meanlon': {'dtype': 'float32'}, 'meanlat': {'dtype': 'float32'}})

In [16]:
# 2014 TCs
idx_select = np.where(idx_year == 2014)[0]
ds_TC_2014 = ds_TC.isel(tracks=idx_select)

In [18]:
ds_TC_2014.to_netcdf('/neelin2020/TempestExtremes/TC/ERA5_TC_tracks_2014.nc'
                     , encoding={'meanlon': {'dtype': 'float32'}, 'meanlat': {'dtype': 'float32'}})