Author: Vincent T. Cooper  

Date: 1 October 2021  

Data preprocessing supporting the analysis in JGR Oceans manuscript submission.  

Purpose 1: Take model output from new run of Roach et al. (2019) coupled wave-ice model that has hourly coupling, and reformat the model output so that it is consistent with the earlier version of the run which had daily coupling and is reported in the Roach et al. (2019) paper. This preprocessing enables use of analysis code developed prior to the new run.

Purpose 2: Calculate the distance inside the ice edge for all model output.

Purpose 3: Reduce model output only to the central Beaufort region of interest to eliminate extraneous burden on data loading. Saving this down and loading only the reduced dataset in the analysis notebook is more nimble.

VTC note: original preprocessing done in waveice_coupled_2021.ipynb, this is a clean version.

In [1]:
import numpy as np
import os
import pandas as pd
import xarray as xr

import matplotlib as mpl
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
# from matplotlib.colors import DivergingNorm
# import matplotlib.patches as patches
%matplotlib inline
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import seaborn as sns; sns.set(color_codes=False)
import cmocean
# from windrose import WindroseAxes
import warnings

import cftime
import datetime


plt.rcParams['xtick.bottom'] = True # keep my tick marks
plt.rcParams['ytick.left'] = True
plt.rcParams['font.size'] = 18
plt.rcParams["font.family"] = "Arial"
# plt.rcParams['figure.figsize'] = 12,8
# mpl.rcParams['figure.dpi'] = 300 # activate for presentation quality

from sklearn.metrics.pairwise import haversine_distances

## this is a dummy grid that has the right conventions
grid = xr.open_dataset(
    '/glade/work/vcooper/grid_ref/sithick_SImon_CESM2_piControl_r1i1p1f1_gn_110001-120012.nc')

## circle boundary for plotting
# theta = np.linspace(0, 2*np.pi, 100)
# center, radius = [0.5, 0.5], 0.5
# verts = np.vstack([np.sin(theta), np.cos(theta)]).T
# circle = mpl.path.Path(verts * radius + center)

  decode_timedelta=decode_timedelta,


# Define function to calculate distance inside ice edge

In [3]:
## Updated version to 15% ice concentration threshold
def icedistance(iceconc_input):
    # turn icefracs to numpy array
    icefracsnp = iceconc_input.values
#     lats = iceconc_input.TLAT.values # cice version
#     lons = iceconc_input.TLON.values # cice version
    lats = iceconc_input.latitude.values # wavewatch version
    lons = iceconc_input.longitude.values # wavewatch version


    # create array to hold the distances
    distances = icefracsnp.copy() # same size array as the evaluated data
    distances -= distances # make zeros or nan; we will keep these values for cells that don't need a calc

    
    ##### GET OPEN WATER -> WATER/ICE EDGE LOCATIONS #####
    
    # get all open water locations except at edge of domain to avoid computation breaking
    icefracsnp_noborder = icefracsnp[1:-1,1:-1] # exclude borders for open water checking neighbors
    locations_openw = np.transpose(np.where(icefracsnp_noborder<0.15))
    locations_openw += 1 # adjust indices for the border exclusion

    # create 4 arrays, each represents the offset of open water location in coords by 1 unit
    latp1 = np.append(locations_openw[:,0]+1,locations_openw[:,1]).reshape(locations_openw.shape,order='F')
    latm1 = np.append(locations_openw[:,0]-1,locations_openw[:,1]).reshape(locations_openw.shape,order='F')
    lonp1 = np.append(locations_openw[:,0],locations_openw[:,1]+1).reshape(locations_openw.shape,order='F')
    lonm1 = np.append(locations_openw[:,0],locations_openw[:,1]-1).reshape(locations_openw.shape,order='F')

    # get max icefrac of 4 neighbor cells at each open water cell
    iceneighbormax = np.nanmax(np.stack((icefracsnp[latp1[:,0],latp1[:,1]],
                                         icefracsnp[lonm1[:,0],lonm1[:,1]],
                                         icefracsnp[lonp1[:,0],lonp1[:,1]],
                                         icefracsnp[latm1[:,0],latm1[:,1]])),axis=0)

    # get index of the open water cells with ice neighbor>15% # these are values for which we will calc distance
    wateredge = locations_openw[np.where(iceneighbormax>0.15)]
    wateredgeT = wateredge.T
    wateredgelatlon = np.array([[lats[wateredgeT[0],wateredgeT[1]]],
                                [lons[wateredgeT[0],wateredgeT[1]]]]).squeeze().T # Nx2 matrix of lat,lon
    
    ##### CALCULATION OF DISTANCES #####
    
    # get all cell locations with ice > 15%
    icewhere = np.where(icefracsnp>0.15)
    icecells = np.transpose(icewhere) # index by array position
    icelatlon = np.array([[lats[icewhere]],
                          [lons[icewhere]]]).squeeze().T # Nx2 matrix of lat,lon

    # calculate minimum distance
    mindist = haversine_distances(np.deg2rad(icelatlon),
                                  np.deg2rad(wateredgelatlon)).min(axis=1)*6371000/1000 # x by Radius-earth for km
    
    icecellsT = np.transpose(icecells) # transpose for vectorized indexing
    distances[icecellsT[0],icecellsT[1]] = mindist # put mindist into each grid point


    return(distances)

# Load new hourly coupled model run and calculate $\Delta ^{dist}$

I loaded each year from 2012-2019 as a separate dataset, then looped through each year for preprocessing and saved down each year of distance data separately. Concatenated them later on.  

First, I'll show how this is done for just the 2012-01-01 data for example, then I'll include the actual loops.

In [4]:
hourly_path = '/glade/scratch/bitz/cesm23iws1tsks/run/hourly/'
path_pre_year = hourly_path + 'cesm23iws1tsks.ww3.hi.'

In [7]:
## load data, add latitude and longitude coords
year_sel = '2012'
ww_temp = xr.open_mfdataset(path_pre_year + year_sel + '-01-01*.nc',
                           combine='nested',concat_dim='time')
ww_temp['latitude'] = (['NY','NX'],grid.lat.values)
ww_temp['longitude'] = (['NY','NX'],grid.lon.values)
ww_temp = ww_temp.set_coords(['latitude','longitude'])

## initialize distances array
ice_conc_data = ww_temp.ICE
distances = np.zeros(ice_conc_data.shape)

with warnings.catch_warnings():
    warnings.simplefilter("ignore") ## ignore warnings from div by zero leading to nans

    ## loop through time, calculating distance from ice edge
    for i in range(len(distances)):
        distances[i] = icedistance(ice_conc_data[i])

## convert to xarray
distances_da = xr.DataArray(distances, dims=ice_conc_data.dims,coords=ice_conc_data.coords)

## save netcdf (commented out bc we don't actually use this, it is just a single day example)
# new_filename = '/glade/scratch/vcooper/waveice_analysis/distance_calc/cesm23iws1tsks.ww3.hi.icedistance.' + year + '.nc'
# print ('saving to ', new_filename)

# distances_da.to_netcdf(path=new_filename)
# print ('finished saving')

In [8]:
## set up a function to compute distances and saves files all at once
def distance_calc_output(year_sel):
    ## set up file loading
    hourly_path = '/glade/scratch/bitz/cesm23iws1tsks/run/hourly/'
    path_pre_year = hourly_path + 'cesm23iws1tsks.ww3.hi.'

    ## load data, add latitude and longitude coords
    #year_sel = '2012'
    ww_temp = xr.open_mfdataset(path_pre_year + year_sel + '*.nc',
                               combine='nested',concat_dim='time')
    ww_temp['latitude'] = (['NY','NX'],grid.lat.values)
    ww_temp['longitude'] = (['NY','NX'],grid.lon.values)
    ww_temp = ww_temp.set_coords(['latitude','longitude'])

    ## initialize distances
    ice_conc_data = ww_temp.ICE
    distances = np.zeros(ice_conc_data.shape)

    ## loop through time
    for i in range(len(distances)):
        print(str(i+1) + ' / ' + str(len(distances)))
        distances[i] = icedistance(ice_conc_data[i])

    ## convert to xarray
    distances_da = xr.DataArray(distances, dims=ice_conc_data.dims,coords=ice_conc_data.coords)

    ## save netcdf
    new_filename = '/glade/scratch/vcooper/waveice_analysis/distance_calc/cesm23iws1tsks.ww3.hi.icedistance.' + year_sel + '.nc'
    print ('saving to ', new_filename)

    distances_da.to_netcdf(path=new_filename)
    print ('finished saving')

In [None]:
## dont run this, it is here for reference but will take v long time
yrs = np.array([np.arange(2012,2019+1)])
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    
    for yr in yrs:
        print(str(yr))
        distance_calc_output(str(yr))
        
# with this cell done, we have hourly distance inside the ice edge global data 
# saved separately for each year 2012-2019

## Now load the hourly model data and save versions grouped by year
Still with hourly resolution. This step is just intermediate so that I don't have to waste time loading hourly data and concatting it every time I want to do analysis.

### We are working towards creating a single central Beaufort file with all wavewatch and $\Delta ^{dist}$ data 

In [11]:
#### First, need to create time variable bc wavewatch doesn't have actual time data.
timetemp = np.datetime64('2012-01-01T00:00:00.000000000') + np.arange(8760+24)*np.timedelta64(1,'h')
print(timetemp[0],timetemp[-1])
timetemp_pd = pd.to_datetime(timetemp)
## drop leap years
timetemp_pd = timetemp_pd[~((timetemp_pd.month == 2) & (timetemp_pd.day == 29))]
time2012 = np.array([i.to_datetime64() for i in timetemp_pd])
print(time2012.size)

timetemp = np.datetime64('2013-01-01T00:00:00.000000000') + np.arange(8760)*np.timedelta64(1,'h')
print(timetemp[0],timetemp[-1])
timetemp_pd = pd.to_datetime(timetemp)
## drop leap years
timetemp_pd = timetemp_pd[~((timetemp_pd.month == 2) & (timetemp_pd.day == 29))]
time2013 = np.array([i.to_datetime64() for i in timetemp_pd])
print(time2013.size)

timetemp = np.datetime64('2014-01-01T00:00:00.000000000') + np.arange(8760)*np.timedelta64(1,'h')
print(timetemp[0],timetemp[-1])
timetemp_pd = pd.to_datetime(timetemp)
## drop leap years
timetemp_pd = timetemp_pd[~((timetemp_pd.month == 2) & (timetemp_pd.day == 29))]
time2014 = np.array([i.to_datetime64() for i in timetemp_pd])
print(time2014.size)

timetemp = np.datetime64('2015-01-01T00:00:00.000000000') + np.arange(8760)*np.timedelta64(1,'h')
print(timetemp[0],timetemp[-1])
timetemp_pd = pd.to_datetime(timetemp)
## drop leap years
timetemp_pd = timetemp_pd[~((timetemp_pd.month == 2) & (timetemp_pd.day == 29))]
time2015 = np.array([i.to_datetime64() for i in timetemp_pd])
print(time2015.size)

timetemp = np.datetime64('2016-01-01T00:00:00.000000000') + np.arange(8760+24)*np.timedelta64(1,'h')
print(timetemp[0],timetemp[-1])
timetemp_pd = pd.to_datetime(timetemp)
## drop leap years
timetemp_pd = timetemp_pd[~((timetemp_pd.month == 2) & (timetemp_pd.day == 29))]
time2016 = np.array([i.to_datetime64() for i in timetemp_pd])
print(time2016.size)

timetemp = np.datetime64('2017-01-01T00:00:00.000000000') + np.arange(8760)*np.timedelta64(1,'h')
print(timetemp[0],timetemp[-1])
timetemp_pd = pd.to_datetime(timetemp)
## drop leap years
timetemp_pd = timetemp_pd[~((timetemp_pd.month == 2) & (timetemp_pd.day == 29))]
time2017 = np.array([i.to_datetime64() for i in timetemp_pd])
print(time2017.size)

timetemp = np.datetime64('2018-01-01T00:00:00.000000000') + np.arange(8760)*np.timedelta64(1,'h')
print(timetemp[0],timetemp[-1])
timetemp_pd = pd.to_datetime(timetemp)
## drop leap years
timetemp_pd = timetemp_pd[~((timetemp_pd.month == 2) & (timetemp_pd.day == 29))]
time2018 = np.array([i.to_datetime64() for i in timetemp_pd])
print(time2018.size)

timetemp = np.datetime64('2019-01-01T00:00:00.000000000') + np.arange(8760)*np.timedelta64(1,'h')
print(timetemp[0],timetemp[-1])
timetemp_pd = pd.to_datetime(timetemp)
## drop leap years
timetemp_pd = timetemp_pd[~((timetemp_pd.month == 2) & (timetemp_pd.day == 29))]
time2019 = np.array([i.to_datetime64() for i in timetemp_pd])
print(time2019.size)

alltimes_2012_2019 = np.hstack([time2012,time2013,time2014,time2015,
                                time2016,time2017,time2018,time2019])

2012-01-01T00:00:00.000000000 2012-12-31T23:00:00.000000000
8760
2013-01-01T00:00:00.000000000 2013-12-31T23:00:00.000000000
8760
2014-01-01T00:00:00.000000000 2014-12-31T23:00:00.000000000
8760
2015-01-01T00:00:00.000000000 2015-12-31T23:00:00.000000000
8760
2016-01-01T00:00:00.000000000 2016-12-31T23:00:00.000000000
8760
2017-01-01T00:00:00.000000000 2017-12-31T23:00:00.000000000
8760
2018-01-01T00:00:00.000000000 2018-12-31T23:00:00.000000000
8760
2019-01-01T00:00:00.000000000 2019-12-31T23:00:00.000000000
8760


In [None]:
%%time
## each section takes almost 10 minutes to concatenate
hourly_path = '/glade/scratch/bitz/cesm23iws1tsks/run/hourly/'
path_pre_year = hourly_path + 'cesm23iws1tsks.ww3.hi.'
temppath = path_pre_year + '2012'
ww2012 = xr.open_mfdataset(temppath + '*.nc',combine='nested',concat_dim='time')
print('done with 12')

hourly_path = '/glade/scratch/bitz/cesm23iws1tsks/run/hourly/'
path_pre_year = hourly_path + 'cesm23iws1tsks.ww3.hi.'
temppath = path_pre_year + '2013'
ww2013 = xr.open_mfdataset(temppath + '*.nc',combine='nested',concat_dim='time')

hourly_path = '/glade/scratch/bitz/cesm23iws1tsks/run/hourly/'
path_pre_year = hourly_path + 'cesm23iws1tsks.ww3.hi.'
temppath = path_pre_year + '2014'
ww2014 = xr.open_mfdataset(temppath + '*.nc',combine='nested',concat_dim='time')

hourly_path = '/glade/scratch/bitz/cesm23iws1tsks/run/hourly/'
path_pre_year = hourly_path + 'cesm23iws1tsks.ww3.hi.'
temppath = path_pre_year + '2015'
ww2015 = xr.open_mfdataset(temppath + '*.nc',combine='nested',concat_dim='time')

hourly_path = '/glade/scratch/bitz/cesm23iws1tsks/run/hourly/'
path_pre_year = hourly_path + 'cesm23iws1tsks.ww3.hi.'
temppath = path_pre_year + '2016'
ww2016 = xr.open_mfdataset(temppath + '*.nc',combine='nested',concat_dim='time')
print('done with 15 and 16')

hourly_path = '/glade/scratch/bitz/cesm23iws1tsks/run/hourly/'
path_pre_year = hourly_path + 'cesm23iws1tsks.ww3.hi.'
temppath = path_pre_year + '2017'
ww2017 = xr.open_mfdataset(temppath + '*.nc',combine='nested',concat_dim='time')

hourly_path = '/glade/scratch/bitz/cesm23iws1tsks/run/hourly/'
path_pre_year = hourly_path + 'cesm23iws1tsks.ww3.hi.'
temppath = path_pre_year + '2018'
ww2018 = xr.open_mfdataset(temppath + '*.nc',combine='nested',concat_dim='time')

hourly_path = '/glade/scratch/bitz/cesm23iws1tsks/run/hourly/'
path_pre_year = hourly_path + 'cesm23iws1tsks.ww3.hi.'
temppath = path_pre_year + '2019'
ww2019 = xr.open_mfdataset(temppath + '*.nc',combine='nested',concat_dim='time')
print('done with 17-19')

In [None]:
## now add time variable to each dataset
ww2012['time'] = time2012
ww2013['time'] = time2013
ww2014['time'] = time2014
ww2015['time'] = time2015
ww2016['time'] = time2016
ww2017['time'] = time2017
ww2018['time'] = time2018
ww2019['time'] = time2019

## load the distance data we calculated earlier
tempds12 = xr.open_dataarray('/glade/scratch/vcooper/waveice_analysis/distance_calc/cesm23iws1tsks.ww3.hi.icedistance.2012.nc')
tempds13 = xr.open_dataarray('/glade/scratch/vcooper/waveice_analysis/distance_calc/cesm23iws1tsks.ww3.hi.icedistance.2013.nc')
tempds14 = xr.open_dataarray('/glade/scratch/vcooper/waveice_analysis/distance_calc/cesm23iws1tsks.ww3.hi.icedistance.2014.nc')
tempds15 = xr.open_dataarray('/glade/scratch/vcooper/waveice_analysis/distance_calc/cesm23iws1tsks.ww3.hi.icedistance.2015.nc')
tempds16 = xr.open_dataarray('/glade/scratch/vcooper/waveice_analysis/distance_calc/cesm23iws1tsks.ww3.hi.icedistance.2016.nc')
tempds17 = xr.open_dataarray('/glade/scratch/vcooper/waveice_analysis/distance_calc/cesm23iws1tsks.ww3.hi.icedistance.2017.nc')
tempds18 = xr.open_dataarray('/glade/scratch/vcooper/waveice_analysis/distance_calc/cesm23iws1tsks.ww3.hi.icedistance.2018.nc')
tempds19 = xr.open_dataarray('/glade/scratch/vcooper/waveice_analysis/distance_calc/cesm23iws1tsks.ww3.hi.icedistance.2019.nc')

tempds12['time'] = time2012
tempds13['time'] = time2013
tempds14['time'] = time2014
tempds15['time'] = time2015
tempds16['time'] = time2016
tempds17['time'] = time2017
tempds18['time'] = time2018
tempds19['time'] = time2019

## i accidentally named this distance in original version, fixed here if i need to redo
## add the distance from ice edge variable to the wavewatch data
ww2012['dist'] = tempds12
ww2013['dist'] = tempds13
ww2014['dist'] = tempds14
ww2015['dist'] = tempds15
ww2016['dist'] = tempds16
ww2017['dist'] = tempds17
ww2018['dist'] = tempds18
ww2019['dist'] = tempds19

In [None]:
## save down all of this hourly data grouped by year so i can load it faster later on
%%time
## save preprocessed dataset separately, then concat them later
## each of these saves takes 2h to 2h30m
new_filename = '/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2012.nc'
print('saving to ', new_filename)

ww2012.to_netcdf(path=new_filename)
ww2012.close()
tempds12.close()
print('finished saving')

new_filename = '/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2013.nc'
print('saving to ', new_filename)

ww2013.to_netcdf(path=new_filename)
ww2013.close()
tempds13.close()
print('finished saving')


##
new_filename = '/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2014.nc'
print('saving to ', new_filename)

ww2014.to_netcdf(path=new_filename)
ww2014.close()
tempds14.close()
print('finished saving')

##
new_filename = '/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2015.nc'
print('saving to ', new_filename)

ww2015.to_netcdf(path=new_filename)
ww2015.close()
tempds15.close()
print('finished saving')

##
new_filename = '/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2016.nc'
print('saving to ', new_filename)

ww2016.to_netcdf(path=new_filename)
ww2016.close()
tempds16.close()
print('finished saving')

##
new_filename = '/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2017.nc'
print('saving to ', new_filename)

ww2017.to_netcdf(path=new_filename)
ww2017.close()
tempds17.close()
print('finished saving')

##
new_filename = '/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2018.nc'
print('saving to ', new_filename)

ww2018.to_netcdf(path=new_filename)
ww2018.close()
tempds18.close()
print('finished saving')

##
new_filename = '/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2019.nc'
print('saving to ', new_filename)

ww2019.to_netcdf(path=new_filename)
ww2019.close()
tempds19.close()
print('finished saving')

In [None]:
%%time
## now load in the data for each year and concatenate a reduced version of the dataset
## that only includes the central beaufort region
ww2012 = xr.open_dataset('/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2012.nc')
ww2013 = xr.open_dataset('/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2013.nc')
ww2014 = xr.open_dataset('/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2014.nc')
ww2015 = xr.open_dataset('/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2015.nc')
ww2016 = xr.open_dataset('/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2016.nc')
ww2017 = xr.open_dataset('/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2017.nc')
ww2018 = xr.open_dataset('/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2018.nc')
ww2019 = xr.open_dataset('/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2019.nc')

ww_dict = {'ww2012' : ww2012,
           'ww2013' : ww2013,
           'ww2014' : ww2014,
           'ww2015' : ww2015,
           'ww2016' : ww2016,
           'ww2017' : ww2017,
           'ww2018' : ww2018,
           'ww2019' : ww2019}

## get frequency domain from old data
temp_f = xr.open_dataset('./ww1719ef_beau_cat.nc')

for key,val in ww_dict.items():
    print(key)
    
    ## rename vars to match the old model output
    val = val.rename({'UAX': 'uwnd',
                'UAY': 'vwnd',
                'ICE': 'ice',
                'HS':  'hs',
                'T02': 't02',
                'T0M1':'t0m1',
                'T01': 't01',
                'FP0': 'fp',
                'THM': 'dir',
                'EF':  'ef',
                'FREQ':'f',
                'NX':  'ni',
                'NY':  'nj'})

    val['latitude'] = (['nj','ni'],grid.lat.values)
    val['longitude'] = (['nj','ni'],grid.lon.values)
    val = val.set_coords(['time','latitude','longitude'])
    val['f'] = temp_f.f
    val = val.sel(nj=latslice)
    val.coords['mask'] = (('nj','ni'), beau_mask)
    
    ## eliminate all data outside central beaufort region
    val = val.where(val.mask > 0, drop=True)
    ww_dict[key] = val

##########################################

In [None]:
%%time
ww_allbeau = xr.concat([ww_dict['ww2012'],
                        ww_dict['ww2013'],
                        ww_dict['ww2014'],
                        ww_dict['ww2015'],
                        ww_dict['ww2016'],
                        ww_dict['ww2017'],
                        ww_dict['ww2018'],
                        ww_dict['ww2019']],dim='time')

In [None]:
%%time
## save netcdf of central beaufort 2012-2019 with all wavewatch and distance data
new_filename = '/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2012-2019.beau.nc'
print ('saving to ', new_filename)

ww_allbeau.to_netcdf(path=new_filename)
print ('finished saving')

In [None]:
## then can load it and rename it to plug into existing code
ww_allbeau = xr.open_dataset('/glade/scratch/vcooper/waveice_analysis/cesm23iws1tsks.ww3.hi.2012-2019.beau.nc')
ww_allbeau = ww_allbeau.rename({'distance':'dist'})

## note that when we load in the the 2012-2019 dataset from the new run,
## we still keep the "ww1719" naming convention from when we only had 
## spectral output from 2017-2019. This is annoying because the '1719'
## is a misnomer, but this way the analysis code didnt need to be changed
## with the new run.
ww1719ef_beau_cat = ww_allbeau
ww1719beau_cat = ww_allbeau