# Hot day persistence

## also called duration histogram

Resample years for significance testing. This should take account of seasonal cycle. -> Does the order of year matter?

In [None]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.transforms as mtransforms
import pandas
import scipy.signal
import cmocean
import numba
import os

work = os.environ.get('WORK')+'/'
plt.rcParams.update({'font.size': 14})

## Mask hot days

In [None]:
# load temperature data
directory = work+'/wolfgang/ERA5_surf_day_max/'
    
files = [directory+f for f in os.listdir(directory) if f.startswith('era5_an_t2m_reg05_1h')]
files.sort()

dist = xr.open_dataarray(work+'/wolfgang/ERA5_surf_day_max/percentiles_31days.nc').sel(p=0.9)


In [None]:
failing = []

for f in files:
    
    data = xr.open_dataset(f)['var167']
    day = data['time.dayofyear']
    tmp = data < dist.sel(dayofyear=day)
    
    failing.append(tmp)
    
failing = xr.concat(failing,dim='time')
failing = failing.stack(ncells=('lat','lon'))

failing

## Count duration year-by-year

In [None]:
@numba.jit(nopython=True)
def count_duration(array,index):
    '''
        Count occurence of set of consecutive hot days with certain length
        
        - first element of occurence counts sets that are longer than max_duration
    '''
    max_duration = 14
    occurence = np.zeros(max_duration+1,np.int_)
    count = 0
    
    for i in index:
        if array[i]:
            if count > 0:
                if count > max_duration:
                    occurence[0] += 1
                else:
                    occurence[count] += 1
            
            count = 0  
            
        else:
            count +=1
            
    return occurence

In [None]:
def loop_cells(array):
    '''
        Loop counter over ncells
    '''
    N = len(array.time)
    index = np.arange(N)
    
    # prepare array
    len1 = len(count_duration(array.isel(ncells=0).values,index))
    hist = np.zeros((len1,len(array.ncells)),np.int_)
    
    for i in range(len(array.ncells)):
        
        hist[:,i] = count_duration(array.isel(ncells=i).values,index)
        
        
    # prepare data
    length = xr.DataArray(range(1,len1),dims=('length'))
    events = xr.DataArray(hist[1:,:],dims=('length','ncells'),coords=dict(length=length))
    
    days = length * events
    
    missing_events = xr.DataArray(hist[0,:],dims='ncells')
    missing_days = 0.1*N - days.sum('length')
    
    return xr.Dataset(dict(days=days,missing=missing_days,missing_events=missing_events))

In [None]:
persistence = []

groups = list(failing.groupby('time.year'))

for y, g in groups:
    
    count = loop_cells(g)
    
    mean = count.assign_coords(ncells=failing['ncells']).reset_index('ncells').unstack().mean('lon')
    
    persistence.append(mean.assign_coords(year=y))

In [None]:
persistence = xr.concat(persistence, dim='year')

## Significance testing

In [None]:
@numba.guvectorize(
    "(float64[:],float64[:],float64[:,:])",
    "(n), (m) -> (m,n)",
    forceobj=True
)
def random_sample(a,nb,out):
    '''
        Draw len(nb) random samples from array a
        'ziehen mit zuruecklegen'
        
        - nb is a dummy array to get dimension size
    '''
    lt = len(a)
    variates = scipy.stats.uniform.rvs(0,lt,lt*len(nb))
    variates = variates.astype(int).reshape(len(nb),lt)
    out[:,:] = a[variates]

    
@numba.guvectorize(
    "(float64[:],float64[:],float64[:])",
    "(n), (m) -> (m)",
    forceobj=True
)    
def icdf(a,p,out):
    '''
        Inverse empirical cummulative distribution function of array at percentiles p
    '''
    sort = np.sort(a)
    out[:] = sort[np.int64(p*len(a))]
    
    
def t_statistic(x1,x2,dim):
    '''
        T-statistic for the difference of the mean for two samples of equal length
    '''
    diff = x1.mean(dim) - x2.mean(dim)
    err = x1.var(dim) + x2.var(dim)
    err = np.sqrt(err/len(x1[dim]))
    return diff / err


def parametric_bootstrap(sample1,sample2,nb=1000,confid=0.05):
    '''
        Test ensemble mean difference
    '''
    # Produce control samples that fullfill the Null hypothesis
    c1 = sample1 - sample1.mean('year')
    c2 = sample2 - sample2.mean('year')
    
    # Resample control
    bootstrap = xr.DataArray(np.arange(nb),dims=('random'))
    c1 = xr.apply_ufunc(random_sample,
                         *(c1,bootstrap),
                         input_core_dims=[['year'],['random']],
                         output_core_dims=[['random','year']],
                         dask='parallelized',
                         output_dtypes=[[c1.dtype]])
    c2 = xr.apply_ufunc(random_sample,
                         *(c2,bootstrap),
                         input_core_dims=[['year'],['random']],
                         output_core_dims=[['random','year']],
                         dask='parallized',
                         output_dtypes=[[c1.dtype]])
    
    # t statistic for the resampled data
    dist = t_statistic(c1,c2,'year')
    
    # emperical cumulative distribution function
    p = xr.DataArray(np.linspace(0,0.999,1000),dims=('percentile'))
    dist = xr.apply_ufunc(icdf,
                          *(dist,p),
                          input_core_dims=[['random'],['percentile']],
                          output_core_dims=[['percentile']],
                          dask='parallelized',
                          output_dtypes=[[dist.dtype]])
    dist['percentile'] = p
    
    # check whether Null hypothesis can be rejected
    t = t_statistic(sample1,sample2,'year')
    sig = np.add(t < dist.sel(percentile=confid/2,method='nearest'), 
                 t > dist.sel(percentile=1-confid/2,method='nearest'))
    
    return sig

In [None]:
days = persistence['days']


clim = days.sel(year=slice(1979,2022))
early = days.sel(year=slice(1979,2000))
late = days.sel(year=slice(2001,2022))

In [None]:
sig = parametric_bootstrap(early-clim.mean('year'),late-clim.mean('year'),nb=5000)

## Zonal-mean heatwave frequency

In [None]:
def improved_metrics(files):
    '''
        Zonal mean heatwave frequency and median duration for each year
        
        - year-round
        - one file for each year
    '''
    frequency = []
    length = []
    
    for f in files:
        print(f)
        
        data = pandas.read_json(f)
        data['year'] = data['start'].apply(lambda x: int(x[0:4]))
        
        frequency.append(data.groupby(['lat','lon','year'])['length'].sum().to_xarray().mean('lon'))
        length.append(data.groupby(['lat','year'])['length'].median().to_xarray())
        
    frequency = xr.concat(frequency,dim='year')
    length = xr.concat(length,dim='year')
        
    return xr.Dataset(dict(events=frequency,days=length))

In [None]:
directory = work+'wolfgang/ERA5_surf_day_max/heat_1979-2022/'

files = [directory+f for f in os.listdir(directory) if f.endswith('json')]
files.sort()

In [None]:
surf = improved_metrics(files)
surf

In [None]:
frequency = surf['events']

## Figure 7

In [None]:
fig, axes = plt.subplots(nrows=2,ncols=1,figsize=(6,8))

## heatwave frequency

C = frequency.plot(ax=axes[0],x='lat',cmap=cmocean.cm.amp,levels=np.arange(0,37),add_colorbar=False)

ylim = axes[0].get_ylim()

#axes[0].plot([-46.5,-46.5],ylim,'-k')
#axes[0].plot([-58.5,-58.5],ylim,'--k')

axes[0].set_ylabel('Year')
axes[0].set_xlabel('Latitude [°N]')
axes[0].set_xticks([-90,-60,-30,0,30,60,90])

cbar1 = plt.colorbar(C,ax=axes[0])
cbar1.set_label('Heatwave frequency [days / year]',fontsize=12)


# hot days

C = (late.mean('year')-early.mean('year')).plot(ax=axes[1],levels=np.arange(-3,3.25,0.25),add_colorbar=False,extend='both',cmap=cmocean.cm.delta)
                                                
sig.astype('double').plot.contourf(ax=axes[1],levels=[0,0.5,1],hatches=['//',''],alpha=0,add_colorbar=False)

zeros = xr.zeros_like(days['lat']).assign_coords(length=-1000)
C0 = xr.concat([zeros,clim.mean('year')],dim='length').plot.contour(ax=axes[1],x='lat',levels=[1,3,5,7,9,11,13,15,17],colors='w')

plt.clabel(C0,fontsize='x-small')


cbar = plt.colorbar(C,ax=axes[1],orientation='vertical')
cbar.set_label(r'Density difference [year$^{-1}$]',fontsize=13)
cbar.set_ticks([-3,-2,-1,0,1,2,3])

axes[1].set_ylabel('Length [days]',fontsize=13)
axes[1].set_ylim(0.5,14)
axes[1].set_yticks([3,6,9,12])
axes[1].set_yticks([1,2,4,5,7,8,10,11,13,14],minor=True)
    
xlim = axes[1].get_xlim()
axes[1].plot(xlim,[2.5,2.5],linestyle='--',linewidth=1.5,color='m')
axes[1].set_xticks([-90,-60,-30,0,30,60,90])
axes[1].set_xlim(xlim)

axes[1].set_xlabel('Latitude [°N]',fontsize=13)

axes[1].set_title(r'2001-2022 $-$ 1979-2000',weight='bold',fontsize=14)

fig.subplots_adjust(0,0,1,1,0.3,0.3)

trans = mtransforms.ScaledTranslation(-45/72, -20/72, fig.dpi_scale_trans)

axes[0].text(-0.06,1.06,'a)',transform=axes[0].transAxes+trans,fontsize='large',va='bottom')
axes[1].text(-0.06,1.06,'b)',transform=axes[1].transAxes+trans,fontsize='large',va='bottom')


## Process hovmoeller plots

In [None]:
directory = work+'wolfgang/ERA5_surf_day_max/hovmoeller/'

years = range(2001,2023)

files = [[directory+f for f in os.listdir(directory) if f.endswith('-58.5N_%d.nc'%y)] for y in years]

ds = xr.open_mfdataset(files,combine='nested',concat_dim=('onset','onset'))

late_day = ds['anomaly'].where(ds['onset.month'].isin(range(1,13)),drop=True).mean('onset').compute()

#late_day = late_day.interp(step=np.arange(-5,10,2/24),method='cubic')

print(files)

In [None]:
directory = work+'wolfgang/ERA5_surf_day_max/hovmoeller/'

years = range(1979,2001)

files = [[directory+f for f in os.listdir(directory) if f.endswith('-46.5N_%d.nc'%y)] for y in years]

ds = xr.open_mfdataset(files,combine='nested',concat_dim=('onset','onset'))

early_day = ds['anomaly'].where(ds['onset.month'].isin([range(1,13)]),drop=True).mean('onset').compute()

#early_day = early_day.interp(step=np.arange(-5,10,1/24),method='cubic')

print(files)

In [None]:
@numba.guvectorize(   
    "(float64[:],int16,complex128[:])",
    "(n), () -> (n)",
    forceobj=True
)
def _hilbert(y,N,out):
    '''
        Analytic signal using the Hilbert transform technique (Marple, 1999)
    '''
    # Check whether the signal has even or odd length
    if N%2==0:
        a = int(N/2)
    else:
        if N>1:
            a = int((N-1)/2)
        else:
            a = 0
        
    # FFT of y
    z = np.fft.fft(y)
    
    # Zero-out the negative frequencies
    z[a+1:N] = 0
    # Double the positive frequencies except from the 0th and (N/2)th ones
    z = 2*z
    z[0] = z[0]/2
    if N%2==0: 
        # For the even-length case, we also have the Nyquist frequency in the spectrum. 
        # This is shared between the positive and negative frequencies so we need to keep it once (see Marple 1999). 
        # For odd lengths, there is no Nyquist frequency in the spectrum.
        z[a] = z[a]/2

    # Inverse FFT to get the analytic signal
    out[:] = np.fft.ifft(z)
    
    
@numba.vectorize([numba.float64(numba.float64, numba.float64)])
def _rad_diff(a,b):
    '''
        In cases where the upstream and downstream phase differ more than pi or -pi, add/subtract 2pi where needed.
    '''
    diff = a - b
    if diff > np.pi:
        diff -= 2*np.pi
    elif diff < -np.pi:
        diff += 2*np.pi
        
    return diff
    
    
@numba.guvectorize(   
    "(float64[:],float64[:])",
    "(n) -> (n)",
    forceobj=True
)    
def _finite_difference(a,out):
    '''
        Use centered differences in the interior, one-sided differences at the boundaries
    '''
    out[1:-1] = _rad_diff(a[2:],a[:-2])/2.
    out[-1] = _rad_diff(a[-1],a[-2])
    out[0] = _rad_diff(a[1],a[0])
    
    
@numba.guvectorize(   
    "(float64[:],float64[:],float64[:])",
    "(n), (n) -> (n)",
    forceobj=True
)
def _fft_filter(y,mask,out):
    '''
        Filter by multiplication in spectral space
    '''
    z = scipy.fft.fft(y)
    z = z * mask
    out[:] = scipy.fft.ifft(z)
    
    

In [None]:
def filtering(da,kmin=2,kmax=10,alpha=0.5):
    '''
    '''
    # prepare mask
    N = len(da.lon)
    taper = scipy.signal.tukey(kmax-kmin+1,alpha=alpha)
    mask = np.zeros(N)
    mask[kmin:kmax+1] = taper
    mask = xr.DataArray(mask,dims=('freq'))
    
    # multiply data with mask in spectral space
    filtered = xr.apply_ufunc(_fft_filter,
                              *(da,mask),
                              input_core_dims=[['lon'],['freq']],
                              output_core_dims=[['lon']],
                              dask='parallelized',
                              output_dtypes=[da.dtype]
                             )
    
    # Since the ignored negative frequencies would contribute the same as the positive ones
    filtered *= 2 
    
    return filtered

def envelope_phase(da):
    '''
        Absolute value and phase angle of the complex signal
    '''
    sig = xr.apply_ufunc(_hilbert,
                         *(da,len(da.lon)),
                         input_core_dims=[['lon'],[]],
                         output_core_dims=[['lon']],
                         dask='parallelized',
                         output_dtypes=[np.dtype('complex128')]
                        )

    env = np.abs(sig)
    phase = np.arctan2(np.imag(sig),np.real(sig))
    
    return xr.Dataset(dict(env=env,phase=phase))

In [None]:
da = filtering(late_day,kmin=2,kmax=15,alpha=0.5)
ds = envelope_phase(da)
group = envelope_phase(ds['env'])

late = xr.Dataset(dict(ta=late_day,wave=ds['phase'],envelope=ds['env'],group=group['phase']))

In [None]:
da = filtering(early_day,kmin=2,kmax=15,alpha=0.5)
ds = envelope_phase(da)
group = envelope_phase(ds['env'])

early = xr.Dataset(dict(ta=early_day,wave=ds['phase'],envelope=ds['env'],group=group['phase']))

In [None]:
shift = lambda da: np.mod(da,2*np.pi)-np.pi

early['wave_shifted'] = shift(early['wave'])
late['wave_shifted'] = shift(late['wave'])

## Figure 8

In [None]:
fig, axes = plt.subplots(nrows=3,figsize=(8,10))


C = early['ta'].plot(ax=axes[0],levels=np.linspace(-3,3,21),extend='both',add_colorbar=False)

early['wave'].where((early['envelope']>0.19)*(np.abs(early['wave'])<3)).plot.contour(ax=axes[0],levels=[0,],colors='k',linestyles=':',alpha=0.6)
early['wave_shifted'].where((early['envelope']>0.19)*(np.abs(early['wave_shifted'])<2)).plot.contour(ax=axes[0],levels=[0,],colors='k',linestyles=':',alpha=0.6)
early['group'].where(early['envelope']>0.19).plot.contour(ax=axes[0],levels=[0,],colors='k',alpha=0.6)

C0 = early['envelope'].plot.contour(ax=axes[0],levels=[0.2,0.4,0.6,0.8,1.0,1.2,1.4,1.6],colors='w',alpha=0.8)

plt.clabel(C0,levels=[0.4,0.8,1.2],fontsize='x-small',colors='k')



late['ta'].plot(ax=axes[1],levels=np.linspace(-3,3,21),extend='both',add_colorbar=False)

late['wave'].where((late['envelope']>0.19)*(np.abs(late['wave'])<1)).plot.contour(ax=axes[1],levels=[0,],colors='k',linestyles=':',alpha=0.6)
late['wave_shifted'].where((late['envelope']>0.19)*(np.abs(late['wave_shifted'])<1)).plot.contour(ax=axes[1],levels=[0,],colors='k',linestyles=':',alpha=0.6)
late['group'].where(late['envelope']>0.19).plot.contour(ax=axes[1],levels=[0,],colors='k',alpha=0.6)

C0 = late['envelope'].plot.contour(ax=axes[1],levels=[0.2,0.4,0.6,0.8,1.0,1.2,1.4,1.6],colors='w',alpha=0.8)

plt.clabel(C0,levels=[0.4,0.8,1.2],fontsize='x-small',colors='k')


late['wave'].where((late['envelope']>0.19)*(np.abs(late['wave'])<1)).plot.contour(ax=axes[2],levels=[0,],colors='#1f77b4',linestyles=':')
late['wave_shifted'].where((late['envelope']>0.19)*(np.abs(late['wave_shifted'])<1)).plot.contour(ax=axes[2],levels=[0,],colors='#1f77b4',linestyles=':')
late['group'].where(late['envelope']>0.19).plot.contour(ax=axes[2],levels=[0,],colors='#1f77b4',linewidths=1)
late['ta'].plot.contour(ax=axes[2],levels=[0],colors='#1f77b4')

early['wave'].where((early['envelope']>0.19)*(np.abs(early['wave'])<1)).plot.contour(ax=axes[2],levels=[0,],colors='#ff7f0e',linestyles=':')
early['wave_shifted'].where((early['envelope']>0.19)*(np.abs(early['wave_shifted'])<1)).plot.contour(ax=axes[2],levels=[0,],colors='#ff7f0e',linestyles=':')
early['group'].where(early['envelope']>0.19).plot.contour(ax=axes[2],levels=[0,],colors='#ff7f0e',linewidths=1)
early['ta'].plot.contour(ax=axes[2],levels=[0],colors='#ff7f0e')


l1 = axes[2].plot([],[],color='#1f77b4')
l2 = axes[2].plot([],[],color='#ff7f0e')

axes[2].legend([*l1,*l2],['1979-2000','2001-2022'],loc='upper left',fontsize=10)


for ax in axes:
    
    ax.plot(ax.get_xlim(),[0,0],linestyle=':',linewidth=0.5,color='k')
    ax.plot([0,0],ax.get_ylim(),linestyle=':',linewidth=0.5,color='k')

    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.set_ylim(-4,10)
    ax.set_xticks([-180,-120,-60,0,60,120,180])
    ax.set_yticks([-4,0,4,8],minor=False)
    
    
axes[0].set_title('1979-2000',weight='bold',fontsize='smaller')
axes[1].set_title('2001-2022',weight='bold',fontsize='smaller')

axes[2].set_xlabel('Relative longitude [°E] relative to grid point')
axes[1].set_ylabel('Lag [days] relative to heatwave onset',fontsize=16)

axes[0].set_xticklabels([])
axes[1].set_xticklabels([])


cbar = plt.colorbar(C,ax=axes[:2],aspect=30)
cbar.set_label('Day max T2m anomaly from 90th percentile')

box = list(axes[2].get_position().bounds)
box[2] = axes[1].get_position().bounds[2]
axes[2].set_position(box)

trans = mtransforms.ScaledTranslation(-45/72, -20/72, fig.dpi_scale_trans)

axes[0].text(-0.1,1.06,'a)',transform=axes[0].transAxes+trans,fontsize='large',va='bottom')
axes[1].text(-0.1,1.06,'b)',transform=axes[1].transAxes+trans,fontsize='large',va='bottom')
axes[2].text(-0.1,1.06,'c)',transform=axes[2].transAxes+trans,fontsize='large',va='bottom')

