# Validating the environmental pressure assumption

TCRM samples the long-term daily mean sea level pressure at the location of the synthetic TC to represent the far-field pressure value ($P_{env}$). However, looking at the central pressure values, the synthetic TCs tend to have too high a central pressure. Here, we compare the long-term daily mean sea level pressure values against the observed pressures at the outermost closed isobar ($P_{oci}$), and the dependence (or otherwise) on central pressure ($P_c$).

In [1]:
%matplotlib inline

import os
from os.path import join as pjoin
from matplotlib import pyplot as plt
from datetime import datetime, timedelta

from Utilities.metutils import convert
from Utilities.interp3d import interp3d
from Utilities.nctools import ncLoadFile, ncGetData

import numpy as np
import scipy.stats as stats

import pandas as pd
import statsmodels.formula.api as smf

from IPython.html.widgets import interact, fixed
from IPython.html import widgets

import seaborn as sns
sns.set_style("ticks")
sns.set_context("poster")

Define a function to convert the formatted latitude/longitude values to actual numbers.

In [2]:
def convertLatLon(strval):
    """
    Convert a string representing lat/lon values from '140S to -14.0, etc.
    
    :param str strval: string containing the latitude or longitude.
    
    :returns: Latitude/longitude as a float value.
    
    """
    hemi = strval[-1].upper()
    fval = float(strval[:-1]) / 10.
    if (hemi == 'S') | (hemi == 'W'): 
        fval *= -1
    if (hemi == 'E') | (hemi == 'W'):
        fval = fval % 360
    return fval

In [3]:
COLNAMES = ['BASIN','Number', 'Datetime','TECHNUM', 'TECH','TAU', 'Latitude', 'Longitude', 'Windspeed','Pressure',
            'Status', 'RAD', 'WINDCODE','RAD1', 'RAD2','RAD3', 'RAD4','Poci', 'Roci','rMax', 'GUSTS','EYE',
            'SUBREGION','MAXSEAS', 'INITIALS','DIR', 'SPEED','STORMNAME', 'DEPTH','SEAS',
            'SEASCODE','SEAS1', 'SEAS2','SEAS3', 'SEAS4'] 

COLTYPES = ['|S2', 'i', datetime, 'i', '|S4', 'i', 'f', 'f', 'f', 'f', 
            '|S4', 'f', '|S3', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f',
            '|S1', 'f', '|S3', 'f', 'f', '|S10', '|S1', 'f', 
            '|S3', 'f', 'f', 'f', 'f']
COLUNITS = ['', '', '', '', '', '', '', '', 'kts', 'hPa', 
            '', 'nm', '', 'nm', 'nm', 'nm', 'nm', 'hPa', 'nm', 'nm', 'kts', 'nm',
            '', '', '', 'degrees', 'kts', '', '', '',
            '', '', '', '', '']
DATEFORMAT = "%Y%m%d%H"
dtype = np.dtype({'names':COLNAMES, 'formats':COLTYPES})
converters = {
    1: lambda s: s.strip(' ,'),
    2: lambda s: datetime.strptime(s.strip(' ,'), DATEFORMAT),
    6: lambda s: float(convertLatLon(s.strip(' ,'))),
    7: lambda s: float(convertLatLon(s.strip(' ,'))),
    8: lambda s: s.strip(' ,'),
    9: lambda s: s.strip(' ,'),
    10: lambda s: s.strip(' ,'),
    11: lambda s: convert(float(s.strip(' ,') or 0), COLUNITS[11], 'km'),
    12: lambda s: s.strip(' ,'),
    13: lambda s: convert(float(s.strip(' ,') or 0), COLUNITS[13], 'km'),
    14: lambda s: convert(float(s.strip(' ,') or 0), COLUNITS[14], 'km'),
    15: lambda s: convert(float(s.strip(' ,') or 0), COLUNITS[15], 'km'),
    16: lambda s: convert(float(s.strip(' ,') or 0), COLUNITS[16], 'km'),
    17: lambda s: float(s.strip(',')),
    18: lambda s: convert(float(s.strip(' ,') or 0), COLUNITS[18], 'km'),
    19: lambda s: convert(float(s.strip(' ,') or 0), COLUNITS[19], 'km'),
}
delimiter = (3,4,12,4,6,5,7,7,5,6,4,5,5,6,6,6,6,6,6,5,5,5,5)
skip_header = 0
usecols = tuple(range(23))
missing_value = ""
filling_values = 0

def loadData(filename):
    try:
        data = np.genfromtxt(filename, dtype, delimiter=delimiter, skip_header=skip_header, 
                             converters=converters, missing_values=missing_value, 
                             filling_values=filling_values, usecols=usecols, autostrip=True, invalid_raise=False)
    except IndexError:
        try:
            data = np.genfromtxt(filename, dtype, delimiter=delimiter, skip_header=skip_header, 
                             converters=converters, missing_values=missing_value, 
                             filling_values=filling_values, usecols=tuple(range(18)), autostrip=True, invalid_raise=False)
        except IndexError:
            data = np.genfromtxt(filename, dtype, delimiter=[3,4,12,4,6,5,7,7,5], skip_header=skip_header, 
                             converters=converters, missing_values=missing_value, 
                             filling_values=filling_values, usecols=tuple(range(9)), autostrip=True, invalid_raise=False)
    return data


Often the b-deck files contain multiple records with the same time stamp. This is to record information on different wind speed radii (e.g. the radius to 34-knot winds, 48-knot winds, etc.). We can quickly filter out this extra information using [`numpy.unique()`](http://docs.scipy.org/doc/numpy/reference/generated/numpy.unique.html). Additional filtering restricts to a known domain and only those storms that are of Tropical Storm or Typhoon strength.

In [7]:
def filterData(data):
    datetimes, idx = np.unique(data['Datetime'], True)
    filter1 = (data['Status'][idx] == 'TS') | (data['Status'][idx] == 'TY')
    filter2 = (data['Longitude'][idx] >= 90.) & (data['Longitude'][idx] <= 180.)
    filter3 = (data['rMax'][idx] >= 0.1)
    subsidx = np.nonzero(filter1 & filter2 & filter3)
    return data[subsidx]

def julianDays(datetime):
    jdays = np.array([float(dt.strftime("%j")) + dt.hour/24. for dt in datetime])
    return jdays

In [8]:
def processFiles(path, basin):
    lon = np.array([])
    lat = np.array([])
    prs = np.array([])
    poci = np.array([])
    day = np.array([])
    for root, dirs, files in os.walk(path):
        if root.endswith(basin):
            for file in files:
                data = loadData(pjoin(root, file))
                if 'Status' in data.dtype.names:
                    data = filterData(data)
                    if 'Poci' in data.dtype.names:
                        poci = np.append(poci, data['Poci'])
                        prs = np.append(prs, data['Pressure'])
                        lat = np.append(lat, data['Latitude'])
                        lon = np.append(lon, data['Longitude'])
                        day = np.append(day, julianDays(data['Datetime']))
    return poci, prs, lon, lat, day

In [9]:
inputPath = "C:\\WorkSpace\\data\\Raw\\best_tracks"
spoci, sprs, slon, slat, sdays = processFiles(inputPath, 'sh')

In [10]:
scoords = np.array([sdays, slat, slon])
ncfile = "C:\\WorkSpace\\tcrm\\MSLP\\slp.day.ltm.nc"
ncobj = ncLoadFile(ncfile)
slpunits = getattr(ncobj.variables['slp'], 'units')
slpdata = ncGetData(ncobj, 'slp')
spenv = interp3d(slpdata, scoords, scale=[365., 180., 360.], offset=[0., -90., 0.])
spenv = convert(spenv, slpunits, 'hPa')


In [11]:
sjp = sns.jointplot(spenv.compress(spoci!=0), spoci.compress(spoci!=0), kind='hex')

sjp.set_axis_labels(r'$P_{env}$', r'$P_{oci}$')


So the long-term daily mean sea level pressure isn't the best predictor of $P_{oci}$ - in either the Southern Hemisphere or the North West Pacific basin. There are cases in the NW Pacific where the difference is > 30 hPa! That's going to generate some very strange results.

Can we improve this? Let's start by looking at any possible relationship to the central pressure.

In [12]:
sdf = pd.DataFrame({'CentralPressure': sprs.compress(spoci!=0),
                    'EnvPressure': spenv.compress(spoci!=0),
                    'Poci': spoci.compress(spoci!=0), 
                    'Longitude': slon.compress(spoci!=0),
                    'Latitude': slat.compress(spoci!=0)})

sdf['Pdiff'] = sdf['EnvPressure'] - sdf['Poci']

In [13]:
sns.interactplot('EnvPressure', 'CentralPressure', 'Poci', sdf)

In [14]:
sns.pairplot(sdf, kind='reg')

In [15]:
ax = sns.distplot(sdf['Pdiff'])
ax.set_xlabel(r'$P_{env} - P_{oci}$ (hPa)')
ax.set_ylabel('Probability')
sns.despine()

In [16]:
from patsy import dmatrices
import statsmodels.api as sm
y, X = dmatrices('Pdiff ~ EnvPressure + CentralPressure + np.abs(Latitude)', data=sdf, return_type='dataframe')
mod = sm.OLS(y, X)
res = mod.fit()
print(res.summary())

In [17]:
res.params

In [20]:
def plotter(a, b, c, d, lat):
    pc = np.linspace(800, 1000, 200)
    penv = np.linspace(1000, 1020, 200)
    ppc, ppenv = np.meshgrid(pc, penv)

    levels = np.arange(990, 1021, 1)
    y = a*ppenv - b*ppc - c*np.abs(lat) - d
    cb = plt.contourf(pc, penv, np.flipud(y), levels=levels, extend='both', 
                     cmap=sns.light_palette((210, 90, 50), input="husl", as_cmap=True))
    cs = plt.contour(pc, penv, np.flipud(y), levels=levels, colors='0.25')
    plt.clabel(cs, inline=1, fmt='%.1f hPa')
    plt.colorbar(cb, label=r"Modelled $P_{oci}$ (hPa)", extend='both', 
                 shrink=1.0, aspect=30)
    plt.xlabel("Central pressure (hPa)")
    plt.ylabel("Long-term daily mean sea level pressure (hPa)")

In [21]:
d = res.params['Intercept']
c = res.params['np.abs(Latitude)']
b = res.params['CentralPressure']
a = 1 - res.params['EnvPressure']


interact(plotter, a=fixed(a), b=fixed(b), c=fixed(c), d=fixed(d), lat=(-30, -5, 1))