# S2S temperature bias

Create Figure 2. It requires a timeseries of zonal-mean zonal wind at 60°N and 10hPa from reananlysis, and polar cap temperture biases at 50hPa from S2S hindcasts. (For some startdates of the ECMWF S2S hindcasts multiple model versions exists. I have kept the newest only. Some startdates are initialized from ERA-Interim, some from ERA5.)  

In [None]:
import os
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib.transforms as mtransforms
import scipy.stats as stats
from numba import float64, guvectorize

plt.rcParams.update({'font.size': 14})

In [None]:
def bias_comparision(weak,strong,clim,nrandom=10000,title=None,filename=None):
    '''
    '''
    dist_weak = mc_composite(clim,weak,clim,nrandom=nrandom)
    dist_strong = mc_composite(clim,strong,clim,nrandom=nrandom)
    
    weak = weak.mean('realization')
    strong = strong.mean('realization')
    clim = clim.mean('realization')
    
    # Plotting
    fig, axes = plt.subplots(2,1,sharex=True,figsize=(8,6))
    
    clim.plot.line(ax=axes[0],linestyle='dashed',color='gray',label='clim',linewidth=3)
    weak.plot.line(ax=axes[0],linestyle='dotted',color='blue',label='weak',linewidth=3)
    strong.plot.line(ax=axes[0],linestyle='solid',color='orange',label='strong',linewidth=3)
    
    (weak-clim).plot.line(ax=axes[1],label='weak-clim',color='blue',linestyle='dotted',linewidth=3)
    (strong-clim).plot.line(ax=axes[1],label='strong-clim',color='orange',linestyle='solid',linewidth=3)
    
    axes[1].fill_between(dist_weak.step.values,
                         dist_weak.sel(percentile=0.025,method='nearest'),
                         dist_weak.sel(percentile=0.975,method='nearest'),
                         color='blue',alpha=0.4,edgecolor=[])
    dist_weak.sel(percentile=0.0005,method='nearest').plot.line(ax=axes[1],color='blue',linestyle='dotted')
    dist_weak.sel(percentile=0.9995,method='nearest').plot.line(ax=axes[1],color='blue',linestyle='dotted')
    
    axes[1].fill_between(dist_strong.step.values,
                         dist_strong.sel(percentile=0.025,method='nearest'),
                         dist_strong.sel(percentile=0.975,method='nearest'),
                         color='orange',alpha=0.4,edgecolor=[])
    dist_strong.sel(percentile=0.0005,method='nearest').plot.line(ax=axes[1],color='orange',linestyle='solid')
    dist_strong.sel(percentile=0.9995,method='nearest').plot.line(ax=axes[1],color='orange',linestyle='solid')
    
    axes[0].set_ylim((-2.7,0.2))
    axes[0].set_ylabel(None)
    axes[0].set_xlabel(None)
    axes[0].grid()
    axes[0].set_title(None)
    axes[0].legend(loc='center left', bbox_to_anchor=(1, 0.5))

    axes[1].set_ylim((-0.65,0.45))
    axes[1].grid()
    axes[1].set_title(None)
    axes[1].set_xlabel('lead time [days]')
    axes[1].legend(loc='center left', bbox_to_anchor=(1, 0.5))
    axes[1].set_xlim(0,46)
    
    trans = mtransforms.ScaledTranslation(-45/72, -20/72, fig.dpi_scale_trans)
    
    axes[0].text(-0.06,1.0,'a)',transform=axes[0].transAxes+trans,fontsize='large',va='bottom',fontfamily='serif')
    axes[1].text(-0.06,1.0,'b)',transform=axes[1].transAxes+trans,fontsize='large',va='bottom',fontfamily='serif')

    fig.supylabel('polar cap temperature bias [K]',weight='bold')
    
    if not(title) is None:
        fig.suptitle(title,weight='bold')
        
    #fig.subplots_adjust(0.12,0,1,0.95,0,0)
    plt.tight_layout()
    
    if not(filename is None):
        plt.savefig(filename,dpi=300,bbox_inches='tight')
    

In [None]:
@guvectorize(
    "(float64[:],float64,float64[:],float64[:])",
    "(n), (), (m) -> (m)",
    forceobj=True
)
def random_composite_mean(a,nt,nrandom,out):
    index = stats.uniform.rvs(0,len(a),int(nt)*len(nrandom))
    index = index.astype(int).reshape((int(nt),len(nrandom)))
    out[:] = np.mean(a[index],axis=0)
    
    
    
@guvectorize(
    "(float64[:],float64[:],float64[:])",
    "(n), (m) -> (m)",
    forceobj=True
)    
def ecdf(a,p,out):
    '''
        Emperical cummulative distribution function of array
        at percentiles p
    '''
    sort = np.sort(a)
    out[:] = sort[np.int64(p*len(a))]
    
    

def mc_composite(population,c1,c2,dim='realization',nrandom=1000,pvalue=0.05):
    '''
        Compute a distribution for composite mean difference in a Monte Carlo procedure
        - check wether the observed composite mean differnce lies at the margin of 
          the distribution definied by the pvalue
    '''
    # create dummy coordinate for random dimension
    random = xr.DataArray(np.arange(nrandom),dims=('random',))
    
    # observed composite mean difference 
    n1 = len(c1[dim])
    n2 = len(c2[dim])
    #diff = c1.mean(dim) - c2.mean(dim)
    
    # create random composite means
    c1 = xr.apply_ufunc(random_composite_mean,
                        *(population,n1,random),
                        input_core_dims=[[dim],[],['random']],
                        output_core_dims=[['random']],
                        dask='parallized',
                        output_dtypes=[[hindcasts.dtype]])
    
    c2 = xr.apply_ufunc(random_composite_mean,
                        *(population,n2,random),
                        input_core_dims=[[dim],[],['random']],
                        output_core_dims=[['random']],
                        dask='parallized',
                        output_dtypes=[[hindcasts.dtype]])
    
    dist = c1 - c2
    
    # emperical cumulative distribution function
    p = xr.DataArray(np.linspace(0,0.99999,100000),dims=('percentile'))
    dist = xr.apply_ufunc(ecdf,
                          *(dist,p),
                          input_core_dims=[['random'],['percentile']],
                          output_core_dims=[['percentile']],
                          dask='parallized',
                          output_dtypes=[[dist.dtype]])
    dist['percentile'] = p
     
    
    return dist
                        


In [None]:
# load reanalysis                                        
reanalysis = xr.open_dataset('./era5_zonal_mean_wind.nc')
reanalysis = reanalysis['u'].sel(level=10).load().drop(('level','latitude'))

# combine two set of hindcasts
level = 50
system = 'ECMWF'
da1 = xr.open_dataarray('./T'+str(level)+'_bias_'+system+'-ERA5.nc')
da2 = xr.open_dataarray('./T'+str(level)+'_bias_'+system+'-ERA-Int-6hourly.nc')
hindcasts = xr.concat([da1,da2],dim='time')
hindcasts = hindcasts.dropna('time')
hindcasts['step'] = hindcasts['step.days']

# make composites
weak = hindcasts.where(reanalysis < 5,drop=True)
strong = hindcasts.where(reanalysis > 40,drop=True)

print('weak startdates: '+str(len(weak['time'])))
print('strong startdates: '+str(len(strong['time'])))
print('clim startdates: '+str(len(hindcasts['time'])))


# plot comparision
weak = weak.stack(realization=('time','number')).drop('realization')
strong = strong.stack(realization=('time','number')).drop('realization')
clim = hindcasts.stack(realization=('time','number'))

bias_comparision(weak,strong,clim,nrandom=20000,title=None,filename=None)