In [None]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.transforms as mtransforms
import cmocean
import numba
import os

from dask.distributed import Client
from scipy import stats

from wave_util import construct_rolling_dataset, remove_climatology, compute_spectr

work = os.environ.get('WORK')+'/'
plt.rcParams.update({'font.size': 14})

In [None]:
client = Client()

client

In [None]:
def centroid(da,dim):
    return (da*da[dim]).sum(('wavenumber','phase_speed')) / da.sum(('wavenumber','phase_speed'))

In [None]:
# construct list nested list of filenames

directory = work+'DATA/ERA5/eth/plev/'

files = [directory + f for f in os.listdir(directory) if f.startswith('era5_an_vwind_reg2_6h')]

files.sort() 

n_valid_years = int(len(files)/12)

files = [files[i:i+12] for i in range(0,n_valid_years*12,12)]

In [None]:
selection = dict(lat=slice(-20,-80),plev=25000)

rolling = construct_rolling_dataset(files,selection=selection,n_per_window=14*4,season_days=365)

In [None]:
reduced = rolling.where(rolling['rolling.year'].isin(range(1979,2023)),drop=True)
reduced = reduced.groupby('rolling.year').apply(lambda da: da.isel(rolling=range(0,51,2)))
anomalies = remove_climatology(reduced)

In [None]:
spectra = compute_spectra(anomalies['var132'].rename(dict(lat='latitude',lon='longitude')),wavenumber=slice(1,10),dc=1/3)

In [None]:
spectra = spectra.compute()

## Figure 6

In [None]:
spectra = spectra.sel(latitude=slice(-35,-65)).mean('latitude')

spectra_early = spectra.where(spectra['rolling.year'].isin(range(1979,2023)),drop=True)

spectra_early = spectra.where(spectra['rolling.year'].isin(range(1979,2001)),drop=True)
spectra_late = spectra.where(spectra['rolling.year'].isin(range(2001,2023)),drop=True)

In [None]:
@numba.guvectorize(
    "(float64[:],float64[:],float64[:,:])",
    "(n), (m) -> (m,n)",
    forceobj=True
)
def random_sample(a,nb,out):
    '''
        Draw len(nb) random samples from array a
        'ziehen mit zuruecklegen'
        
        - nb is a dummy array to get dimension size
    '''
    lt = len(a)
    variates = stats.uniform.rvs(0,lt,lt*len(nb))
    variates = variates.astype(int).reshape(len(nb),lt)
    out[:,:] = a[variates]
    

@numba.guvectorize(
    "(float64[:],float64[:],float64[:])",
    "(n), (m) -> (m)",
    forceobj=True
)    
def ecdf(a,p,out):
    '''
        Emperical cummulative distribution function of array
        at percentiles p
    '''
    sort = np.sort(a)
    out[:] = sort[np.int64(p*len(a))]
    
    
def t_statistic(x1,x2,dim):
    '''
        T-statistic for the difference of the mean for two samples of equal length
    '''
    diff = x1.mean(dim) - x2.mean(dim)
    err = x1.var(dim) + x2.var(dim)
    err = np.sqrt(err/len(x1[dim]))
    return diff / err


def parametric_bootstrap(sample1,sample2,nb=1000,confid=0.05):
    '''
        Test ensemble mean difference
    '''
    # Produce control samples that fullfill the Null hypothesis
    c1 = sample1 - sample1.mean('rolling')
    c2 = sample2 - sample2.mean('rolling')
    
    # Resample control
    bootstrap = xr.DataArray(np.arange(nb),dims=('random'))
    c1 = xr.apply_ufunc(random_sample,
                         *(c1,bootstrap),
                         input_core_dims=[['rolling'],['random']],
                         output_core_dims=[['random','rolling']],
                         dask='parallelized',
                         output_dtypes=[[c1.dtype]])
    c2 = xr.apply_ufunc(random_sample,
                         *(c2,bootstrap),
                         input_core_dims=[['rolling'],['random']],
                         output_core_dims=[['random','rolling']],
                         dask='parallized',
                         output_dtypes=[[c1.dtype]])
    
    # t statistic for the resampled data
    dist = t_statistic(c1,c2,'rolling')
    
    # emperical cumulative distribution function
    p = xr.DataArray(np.linspace(0,0.999,1000),dims=('percentile'))
    dist = xr.apply_ufunc(ecdf,
                          *(dist,p),
                          input_core_dims=[['random'],['percentile']],
                          output_core_dims=[['percentile']],
                          dask='parallelized',
                          output_dtypes=[[dist.dtype]])
    dist['percentile'] = p
    
    # check whether Null hypothesis can be rejected
    t = t_statistic(sample1,sample2,'rolling')
    sig = np.add(t < dist.sel(percentile=confid/2,method='nearest'), 
                 t > dist.sel(percentile=1-confid/2,method='nearest'))
    
    return sig

In [None]:
sig = parametric_bootstrap(spectra_early,spectra_late,nb=5000,confid=0.05)

In [None]:
fig, axes = plt.subplots(nrows=2,ncols=1,figsize=(6,8))

## mean spectrum

C1 = spectra.mean('rolling').plot.pcolormesh(ax=axes[0],levels=np.linspace(0,4.5,19),extend='max',cmap=cmocean.cm.matter,add_colorbar=False)


l = axes[0].plot(centroid(spectra.mean('rolling'),'phase_speed').values,
                 centroid(spectra.mean('rolling'),'wavenumber').values,
                 marker='o',markeredgecolor='k',markersize=10,markeredgewidth=2)
l[0].set_markerfacecolor((0,0,0,0))

# configure axes

axes[0].set_xlim(-20,20)
axes[0].set_xticks([-20,-10,0,10,20])
axes[0].set_xticks([-15,-5,5,15],minor=True)
axes[0].set_ylim(1,10)
axes[0].set_yticks([2,4,6,8,10])
axes[0].set_yticks([1,3,5,7,9],minor=True)
axes[0].grid(axis='both')

axes[0].set_ylabel('Zonal wavenumber')
axes[0].set_xlabel(r'Phase speed [m s$^{-1}$]')
axes[0].set_title('Reference',weight='bold',fontsize=14)

cbar = plt.colorbar(C1,ax=axes[0],ticks=[0,1,2,3,4,5])
cbar.set_label(r'Power spectral density [m s$^{-1}$]',fontsize=12)


## spectral difference

levels = np.concatenate((np.arange(-5,0,0.1),np.arange(0.1,5.1,0.1)))

C = (spectra_late.mean('rolling')-spectra_early.mean('rolling')).plot.contour(ax=axes[1],levels=levels,cmap=cmocean.cm.rain,
                                        linestyles=np.where(levels>0,'solid','dotted'))
axes[1].clabel(C)

sig.astype(np.double).plot.contourf(ax=axes[1],levels=[0,0.5,1],hatches=['..',''],alpha=0,add_colorbar=False)

l = axes[1].plot(centroid(spectra_early.mean('rolling'),'phase_speed').values,
                 centroid(spectra_early.mean('rolling'),'wavenumber').values,
                 marker='+',markeredgecolor='k',markersize=10,markeredgewidth=2)
l[0].set_markerfacecolor((0,0,0,0))

axes[1].plot(centroid(spectra_late.mean('rolling'),'phase_speed').values,
                 centroid(spectra_late.mean('rolling'),'wavenumber').values,
                 marker='x',markeredgecolor='k',markersize=10,markeredgewidth=2)

# configure axes

axes[1].set_xlim(-20,20)
axes[1].set_xticks([-20,-10,0,10,20])
axes[1].set_xticks([-15,-5,5,15],minor=True)
axes[1].set_ylim(1,10)
axes[1].set_yticks([2,4,6,8,10])
axes[1].set_yticks([1,3,5,7,9],minor=True)
axes[1].grid(axis='both')

axes[1].set_ylabel('Zonal wavenumber')
axes[1].set_xlabel(r'Phase speed [m s$^{-1}$]')
axes[1].set_title(r'2001-2022 $-$ 1979-2000',weight='bold',fontsize=14)


## configure

fig.subplots_adjust(0,0,1,1,0.3,0.3)

box = list(axes[1].get_position().bounds)
box[2] = axes[0].get_position().bounds[2]
axes[1].set_position(box)

trans = mtransforms.ScaledTranslation(-45/72, -20/72, fig.dpi_scale_trans)

axes[0].text(-0.06,1.06,'a)',transform=axes[0].transAxes+trans,fontsize='large',va='bottom')
axes[1].text(-0.06,1.06,'b)',transform=axes[1].transAxes+trans,fontsize='large',va='bottom')
