"""This notebook uses the processed files for CAT indices to calculate thresholds for moderate or greater MOG turbulence.
First, we select a mid-lat region for defining MOG.
Then we calculate the threshold or thresholds using quantiles.
Once we have defined a threshold, we can use that value to calculate the frequency of exceeding that threshold.
Frequency above thresholds can be used to evaluate time series and trends over time,
as well as spatial distributions of the strongest areas of turbulence
"""

In [1]:
import xarray as xr
import glob
import intake
import numpy as np
import os

import logging
logging.getLogger("flox").setLevel(logging.WARNING)


In [2]:
from plotting_maps.acs_plotting_maps import plot_acs_hazard_multi, plot_acs_hazard, plot_data, cmap_dict, regions_dict
from matplotlib import colors, cm
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt

from dask.diagnostics import ProgressBar
# ProgressBar().register()

import dask
from dask.distributed import Client
# client = Client(threads_per_worker=2, n_workers=14)
client = Client(threads_per_worker=28, n_workers=7)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 7
Total threads: 196,Total memory: 125.19 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:38805,Workers: 7
Dashboard: /proxy/8787/status,Total threads: 196
Started: Just now,Total memory: 125.19 GiB

0,1
Comm: tcp://127.0.0.1:33921,Total threads: 28
Dashboard: /proxy/33617/status,Memory: 17.88 GiB
Nanny: tcp://127.0.0.1:36211,
Local directory: /jobfs/151367043.gadi-pbs/dask-scratch-space/worker-bs01utfg,Local directory: /jobfs/151367043.gadi-pbs/dask-scratch-space/worker-bs01utfg

0,1
Comm: tcp://127.0.0.1:32769,Total threads: 28
Dashboard: /proxy/40195/status,Memory: 17.88 GiB
Nanny: tcp://127.0.0.1:35043,
Local directory: /jobfs/151367043.gadi-pbs/dask-scratch-space/worker-opwb6vxl,Local directory: /jobfs/151367043.gadi-pbs/dask-scratch-space/worker-opwb6vxl

0,1
Comm: tcp://127.0.0.1:45381,Total threads: 28
Dashboard: /proxy/43761/status,Memory: 17.88 GiB
Nanny: tcp://127.0.0.1:43953,
Local directory: /jobfs/151367043.gadi-pbs/dask-scratch-space/worker-oigee1jc,Local directory: /jobfs/151367043.gadi-pbs/dask-scratch-space/worker-oigee1jc

0,1
Comm: tcp://127.0.0.1:33863,Total threads: 28
Dashboard: /proxy/36033/status,Memory: 17.88 GiB
Nanny: tcp://127.0.0.1:38207,
Local directory: /jobfs/151367043.gadi-pbs/dask-scratch-space/worker-94hero9s,Local directory: /jobfs/151367043.gadi-pbs/dask-scratch-space/worker-94hero9s

0,1
Comm: tcp://127.0.0.1:40607,Total threads: 28
Dashboard: /proxy/39169/status,Memory: 17.88 GiB
Nanny: tcp://127.0.0.1:43371,
Local directory: /jobfs/151367043.gadi-pbs/dask-scratch-space/worker-g6k7ofaj,Local directory: /jobfs/151367043.gadi-pbs/dask-scratch-space/worker-g6k7ofaj

0,1
Comm: tcp://127.0.0.1:34675,Total threads: 28
Dashboard: /proxy/33785/status,Memory: 17.88 GiB
Nanny: tcp://127.0.0.1:44751,
Local directory: /jobfs/151367043.gadi-pbs/dask-scratch-space/worker-x6pow858,Local directory: /jobfs/151367043.gadi-pbs/dask-scratch-space/worker-x6pow858

0,1
Comm: tcp://127.0.0.1:38797,Total threads: 28
Dashboard: /proxy/44929/status,Memory: 17.88 GiB
Nanny: tcp://127.0.0.1:33225,
Local directory: /jobfs/151367043.gadi-pbs/dask-scratch-space/worker-mefy7lux,Local directory: /jobfs/151367043.gadi-pbs/dask-scratch-space/worker-mefy7lux


2025-10-01 18:26:07,718 - distributed.scheduler - ERROR - Task ('array-8474e1f2e31b1d7b092118b8e7d483e9', 0, 0, 0) marked as failed because 4 workers died while trying to run it
2025-10-01 18:26:07,728 - distributed.scheduler - ERROR - Task original-open_dataset-windspeed-26d0f4b58b49e95b448964cf9eee7ba0 marked as failed because 4 workers died while trying to run it
2025-10-01 18:26:07,757 - distributed.scheduler - ERROR - Task ('array-d1512eba5c50ef204e5f0002178d0fa6', 0) marked as failed because 4 workers died while trying to run it
2025-10-01 18:26:07,758 - distributed.scheduler - ERROR - Task ('array-8474e1f2e31b1d7b092118b8e7d483e9', 0, 0, 0) marked as failed because 5 workers died while trying to run it
2025-10-01 18:26:07,760 - distributed.scheduler - ERROR - Task original-open_dataset-windspeed-2b7aacf1fe0a92e91e8060d002fec209 marked as failed because 4 workers died while trying to run it


In [3]:
mid_lat_slice = slice(-50,-25)
lon_slice = slice(90,195)
baseline_time_range = np.arange(1990,2009+1)

In [4]:
cat_name = "barpa"
col = intake.open_esm_datastore(f"/g/data/lp01/collections/py3.9_dev/nci-{cat_name}.json")

# Edit this cell
var_list = ["ua200", "ua250", "ua300", "va200", "va250", "va300", "ta200", "ta250", "ta300", "zg200", "zg250", "zg300"]
table_id = "6hr"
scenarios = ["historical","ssp126", "ssp370", "ssp585", "evaluation"]

# change this query to select a subset of the data you are interested in
query = dict(variable_id = var_list[0],
             table_id = table_id,
             experiment_id = scenarios,
            )

cat = col.search(**query)
cat.unique()

activity_id                                                    [BARPA-R]
institution_id                                                     [BOM]
version                                                      [v20231001]
variable_id                                                      [ua200]
table_id                                                           [6hr]
source_id              [ACCESS-CM2, ACCESS-ESM1-5, CESM2, CMCC-ESM2, ...
experiment_id           [historical, ssp126, ssp370, ssp585, evaluation]
member_id                      [r4i1p1f1, r6i1p1f1, r11i1p1f1, r1i1p1f1]
grid_label                                                      [AUS-15]
time_range             [196001-196012, 196101-196112, 196201-196212, ...
path                   [/g/data/py18/BARPA/output/CMIP6/DD/AUS-15/BOM...
derived_variable_id                                                   []
dtype: object

In [5]:
# get the list of indices for evaluation, historical and future groups.
cat_df_max = cat.df.groupby(["variable_id", "experiment_id", "source_id", "member_id"]).max().reset_index()
cat_df_max["index"] = cat_df_max.experiment_id + "_" + cat_df_max.source_id + "_" + cat_df_max.member_id
cat_df_max = cat_df_max.set_index("index")
# cat_df_max

# indices for evaluation, historical and future groups. These will share time ranges
i_evaluation = cat_df_max.loc[cat_df_max["experiment_id"].isin(["evaluation"])].index
i_historical = cat_df_max.loc[cat_df_max["experiment_id"].isin(["historical"])].index
i_future = cat_df_max.loc[cat_df_max["experiment_id"].isin(["ssp126", "ssp370", "ssp585"])].index

In [6]:
list_evaluation = ['evaluation_BARRA-R_r1i1p1f1',
                   'evaluation_ERA5_r1i1p1f1',]

list_historical = ['historical_ACCESS-CM2_r4i1p1f1', 
                   'historical_ACCESS-ESM1-5_r6i1p1f1',
                   'historical_CESM2_r11i1p1f1', 
                   'historical_CMCC-ESM2_r1i1p1f1',
                   'historical_EC-Earth3_r1i1p1f1',
                   'historical_MPI-ESM1-2-HR_r1i1p1f1',
                   'historical_NorESM2-MM_r1i1p1f1',
                  ]

list_ssp126 = [
                 # 'ssp126_ACCESS-CM2_r4i1p1f1', # need to fix this one
                 'ssp126_ACCESS-ESM1-5_r6i1p1f1',
                 'ssp126_CESM2_r11i1p1f1',
                 'ssp126_CMCC-ESM2_r1i1p1f1',
                 'ssp126_EC-Earth3_r1i1p1f1',
                 'ssp126_MPI-ESM1-2-HR_r1i1p1f1',
                 'ssp126_NorESM2-MM_r1i1p1f1',
              ]

list_ssp370 = ['ssp370_ACCESS-CM2_r4i1p1f1',
                 'ssp370_ACCESS-ESM1-5_r6i1p1f1',
                 'ssp370_CESM2_r11i1p1f1',
                 'ssp370_CMCC-ESM2_r1i1p1f1',
                 'ssp370_EC-Earth3_r1i1p1f1',
                 'ssp370_MPI-ESM1-2-HR_r1i1p1f1',
                 'ssp370_NorESM2-MM_r1i1p1f1',
              ]

list_ssp585 = ['ssp585_ACCESS-CM2_r4i1p1f1',
                 'ssp585_EC-Earth3_r1i1p1f1']

list_future = list_ssp126 + list_ssp370 + list_ssp585

In [7]:
%%time
turbulence_index = "windspeed"
for run in ['evaluation_ERA5_r1i1p1f1']:
    experiment_id, source_id, member_id = run.split("_")
    print(run)
    time_range = baseline_time_range
    
    filelist = [f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_{year}.nc" 
     for year in time_range]
    
    def _preprocess(ds, q=[0.95, 0.99, 0.999],):
        return ds.sel(lat=mid_lat_slice, lon=lon_slice).chunk({"time":-1, "lat":-1, "lon":-1}).quantile(q, dim=["time", "lat", "lon"])

    ds = xr.open_mfdataset(filelist, use_cftime=True, preprocess=_preprocess, combine="nested", concat_dim="time")
    ds = ds.compute()
    
p95, p99, p999 = ds.mean("time")["windspeed"].values
p95, p99, p999

evaluation_ERA5_r1i1p1f1
CPU times: user 27.5 s, sys: 5.18 s, total: 32.6 s
Wall time: 1min 21s


(61.930551434713365, 74.4594746594145, 86.42902759651287)

In [8]:
%%time
# Determine threshold MOG from evaluation dataset
turbulence_index = "windspeed"
for run in ['evaluation_BARRA-R_r1i1p1f1']:
    experiment_id, source_id, member_id = run.split("_")
    print(run)
    time_range = baseline_time_range
    
    filelist = [f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_{year}.nc" 
     for year in time_range]
    
    def _preprocess(ds, q=[0.95, 0.99, 0.999],):
        return ds.sel(lat=mid_lat_slice, lon=lon_slice).chunk({"time":-1, "lat":-1, "lon":-1}).quantile(q, dim=["time", "lat", "lon"])

    ds = xr.open_mfdataset(filelist, use_cftime=True, preprocess=_preprocess, combine="nested", concat_dim="time")
    ds = ds.compute()
p95, p99, p999 = ds.mean("time")["windspeed"].values
p95, p99, p999

evaluation_BARRA-R_r1i1p1f1
CPU times: user 8.48 s, sys: 1.48 s, total: 9.96 s
Wall time: 40.2 s


(62.835737276077246, 75.76162159729002, 88.1229062187195)

In [9]:
%%time
# annual average value for mid latitudes
filename = f"/home/563/gt3409/turbulence_AUSCAT/{turbulence_index}-freq-above-p99_AUS-15_BOM_BARPA-R_v1-r1_6hr_ann.nc"
if os.path.exists(filename):
    print(f"File '{filename}' already exists.")
else:
    delayed_results = {}
    
    turbulence_index = "windspeed"
    for run in list_evaluation + list_historical + list_future:
        experiment_id, source_id, member_id = run.split("_")
        print(run)
        
        # wildcard for year
        filelist = sorted(glob.glob(f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_*.nc"))
        
        def _preprocess(ds):
            """Calculate annual frequency of exceeding p99 threshold"""
            ds = (ds.sel(lat=mid_lat_slice, lon=lon_slice)>p99)\
                    .mean(["time", "lat", "lon"])\
                    .assign_coords({"time":ds.isel({"time":0})["time"].dt.year})
            
            # ds = (ds.sel(lat=mid_lat_slice, lon=lon_slice)>p99).convert_calendar("standard")\
            #         .mean(["lat", "lon"]).resample({"time":"YE"},).mean(["time"])
            return ds
        
        ds = xr.open_mfdataset(filelist, use_cftime=True,
                               preprocess=_preprocess, combine="nested", 
                               concat_dim="time").assign_coords({"run":run})
    
        delayed_results[run] = ds
    
    
    # Calculate results
    results = []
    
    for run in list_evaluation + list_historical + list_future:
        print(run)
        results.append(delayed_results[run].compute())
    
    # then save to netCDF
    ds_results = xr.concat(results, dim="run",)
    
    print("Saving to netcdf")
    ds_results.to_netcdf(filename)
    print(f"Made '{filename}'")
                         
    ds_results

File '/home/563/gt3409/turbulence_AUSCAT/windspeed-freq-above-p99_AUS-15_BOM_BARPA-R_v1-r1_6hr_ann.nc' already exists.
CPU times: user 0 ns, sys: 70 μs, total: 70 μs
Wall time: 74.6 μs


In [10]:
%%time
# monthly average value for mid latitudes
filename = f"/home/563/gt3409/turbulence_AUSCAT/{turbulence_index}-freq-above-p99_AUS-15_BOM_BARPA-R_v1-r1_6hr_monthly.nc"
if os.path.exists(filename):
    print(f"File '{filename}' already exists.")
else:
    delayed_results = {}
    
    turbulence_index = "windspeed"
    for run in list_evaluation + list_historical + list_future:
        experiment_id, source_id, member_id = run.split("_")
        print(run)
        
        # wildcard for year
        filelist = sorted(glob.glob(f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_*.nc"))
        
        def _preprocess(ds):
            """Calculate annual frequency of exceeding p99 threshold"""
            ds = (ds.sel(lat=mid_lat_slice, lon=lon_slice)>p99)\
                    .convert_calendar("standard")\
                    .mean(["lat", "lon"]).resample({"time":"ME"},).mean(["time"])
            return ds
        
        ds = xr.open_mfdataset(filelist, use_cftime=True,
                               preprocess=_preprocess, combine="nested", 
                               concat_dim="time").assign_coords({"run":run})
    
        delayed_results[run] = ds
    
    
    # Calculate results
    results = []
    
    for run in list_evaluation + list_historical + list_future:
        print(run)
        results.append(delayed_results[run].compute())
    
    # then save to netCDF
    ds_results = xr.concat(results, dim="run",)
    
    print("Saving to netcdf")
    ds_results.to_netcdf(filename)
    print(f"Made '{filename}'")
                         
    ds_results

File '/home/563/gt3409/turbulence_AUSCAT/windspeed-freq-above-p99_AUS-15_BOM_BARPA-R_v1-r1_6hr_monthly.nc' already exists.
CPU times: user 66 μs, sys: 35 μs, total: 101 μs
Wall time: 86.8 μs


In [11]:
%%time
# seasonal average value for mid latitudes
filename = f"/home/563/gt3409/turbulence_AUSCAT/{turbulence_index}-freq-above-p99_AUS-15_BOM_BARPA-R_v1-r1_6hr_seasonal.nc"
if os.path.exists(filename):
    print(f"File '{filename}' already exists.")
else:
    #took about 90 min with 28 cores, 28 threads and 7 workers
    delayed_results = {}
    
    turbulence_index = "windspeed"
    for run in list_evaluation + list_historical + list_future:
        experiment_id, source_id, member_id = run.split("_")
        print(run)
        
        # wildcard for year
        filelist = sorted(glob.glob(f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_*.nc"))
        
        def _preprocess(ds):
            """Calculate annual frequency of exceeding p99 threshold"""
            ds = (ds.sel(lat=mid_lat_slice, lon=lon_slice)>p99)\
                    .convert_calendar("standard")\
                    .mean(["lat", "lon"])
            return ds
        
        ds = xr.open_mfdataset(filelist, use_cftime=True,
                               preprocess=_preprocess, combine="nested", 
                               concat_dim="time")
        
        ds = ds.resample({"time":"QS-DEC"},).mean(["time"]).assign_coords({"run":run})
    
        delayed_results[run] = ds
    
    
    # Calculate results
    results = []
    
    for run in list_evaluation + list_historical + list_future:
        print(run)
        results.append(delayed_results[run].compute())
    
    # then save to netCDF
    ds_results = xr.concat(results, dim="run",)
    
    print("Saving to netcdf")
    ds_results.to_netcdf(filename)
    print(f"Made '{filename}'")
                         


File '/home/563/gt3409/turbulence_AUSCAT/windspeed-freq-above-p99_AUS-15_BOM_BARPA-R_v1-r1_6hr_seasonal.nc' already exists.
CPU times: user 75 μs, sys: 39 μs, total: 114 μs
Wall time: 99.2 μs


In [12]:
# for maps

In [13]:
# run = "evaluation_BARRA-R_r1i1p1f1"

# experiment_id, source_id, member_id = run.split("_")
# ds = xr.open_dataset(f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_2000.nc")
# ds

In [24]:
%%time
# annual average value mapped
filename = f"/home/563/gt3409/turbulence_AUSCAT/{turbulence_index}-freq-above-p99_AUS-15_BOM_BARPA-R_v1-r1_6hr_ann-mapped.nc"
if os.path.exists(filename):
    print(f"File '{filename}' already exists.")
else:
    delayed_results = {}
    
    for run in list_evaluation + list_historical + list_future:
        experiment_id, source_id, member_id = run.split("_")
        print(run)
        
        # wildcard for year
        filelist = sorted(glob.glob(f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_*.nc"))
        
        def _preprocess(ds):
            """Calculate annual frequency of exceeding p99 threshold"""
            ds = (ds.chunk({"time":-1, "lat":200, "lon":160})>p99)\
                        .where(np.isnan(ds)==False)\
                        .convert_calendar("standard")\
                        .resample({"time":"YE"},).mean(["time"])
            return ds
        
        ds = xr.open_mfdataset(filelist, use_cftime=True,
                               preprocess=_preprocess, combine="nested", 
                               concat_dim="time").assign_coords({"run":run})
    
        delayed_results[run] = ds
    
    
    # Calculate results
    results = []
    
    for run in list_evaluation + list_historical + list_future:
        print(run)
        results.append(delayed_results[run].compute())
    
    # then save to netCDF
    ds_results_ann = xr.concat(results, dim="run",)
    
    print("Saving to netcdf")
    ds_results_ann.to_netcdf(filename)
    print(f"Made '{filename}'")
                         
    ds_results_ann

evaluation_BARRA-R_r1i1p1f1
evaluation_ERA5_r1i1p1f1
historical_ACCESS-CM2_r4i1p1f1
historical_ACCESS-ESM1-5_r6i1p1f1
historical_CESM2_r11i1p1f1
historical_CMCC-ESM2_r1i1p1f1
historical_EC-Earth3_r1i1p1f1
historical_MPI-ESM1-2-HR_r1i1p1f1
historical_NorESM2-MM_r1i1p1f1
ssp126_ACCESS-ESM1-5_r6i1p1f1
ssp126_CESM2_r11i1p1f1
ssp126_CMCC-ESM2_r1i1p1f1
ssp126_EC-Earth3_r1i1p1f1
ssp126_MPI-ESM1-2-HR_r1i1p1f1
ssp126_NorESM2-MM_r1i1p1f1
ssp370_ACCESS-CM2_r4i1p1f1
ssp370_ACCESS-ESM1-5_r6i1p1f1
ssp370_CESM2_r11i1p1f1
ssp370_CMCC-ESM2_r1i1p1f1
ssp370_EC-Earth3_r1i1p1f1
ssp370_MPI-ESM1-2-HR_r1i1p1f1
ssp370_NorESM2-MM_r1i1p1f1
ssp585_ACCESS-CM2_r4i1p1f1
ssp585_EC-Earth3_r1i1p1f1
evaluation_BARRA-R_r1i1p1f1


2025-10-01 18:24:04,352 - distributed.worker - ERROR - failed during get data with tcp://127.0.0.1:34675 -> tcp://127.0.0.1:38797
Traceback (most recent call last):
  File "/g/data/hh5/public/apps/miniconda3/envs/analysis3-24.04/lib/python3.10/site-packages/tornado/iostream.py", line 861, in _read_to_buffer
    bytes_read = self.read_from_fd(buf)
  File "/g/data/hh5/public/apps/miniconda3/envs/analysis3-24.04/lib/python3.10/site-packages/tornado/iostream.py", line 1116, in read_from_fd
    return self.socket.recv_into(buf, len(buf))
TimeoutError: [Errno 110] Connection timed out

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/g/data/hh5/public/apps/miniconda3/envs/analysis3-24.04/lib/python3.10/site-packages/distributed/worker.py", line 1783, in get_data
    response = await comm.read(deserializers=serializers)
  File "/g/data/hh5/public/apps/miniconda3/envs/analysis3-24.04/lib/python3.10/site-packages/distributed/comm/

In [None]:
%%time
# monthly average value mapped
filename = f"/home/563/gt3409/turbulence_AUSCAT/{turbulence_index}-freq-above-p99_AUS-15_BOM_BARPA-R_v1-r1_6hr_mon-mapped.nc"
if os.path.exists(filename):
    print(f"File '{filename}' already exists.")
else:
    delayed_results = {}
    
    for run in list_evaluation + list_historical + list_future:
        experiment_id, source_id, member_id = run.split("_")
        print(run)
        
        # wildcard for year
        filelist = sorted(glob.glob(f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_*.nc"))
        
        def _preprocess(ds):
            """Calculate annual frequency of exceeding p99 threshold"""
            ds = (ds.chunk({"time":-1, "lat":200, "lon":160})>p99)\
                        .where(np.isnan(ds)==False)\
                        .convert_calendar("standard")\
                        .resample({"time":"ME"},).mean(["time"])
            return ds
        
        ds = xr.open_mfdataset(filelist, use_cftime=True,
                               preprocess=_preprocess, combine="nested", 
                               concat_dim="time").assign_coords({"run":run})
    
        delayed_results[run] = ds
    
    
    # Calculate results
    results = []
    
    for run in list_evaluation + list_historical + list_future:
        print(run)
        results.append(delayed_results[run].compute())
    
    # then save to netCDF
    ds_results_mon = xr.concat(results, dim="run",)
    
    print("Saving to netcdf")
    ds_results_mon.to_netcdf(filename)
    print(f"Made '{filename}'")
                         
    ds_results_mon

In [None]:
%%time
# seasonal average values mapped
filename = f"/home/563/gt3409/turbulence_AUSCAT/{turbulence_index}-freq-above-p99_AUS-15_BOM_BARPA-R_v1-r1_6hr_seasonal-mapped.nc"
if os.path.exists(filename):
    print(f"File '{filename}' already exists.")
    ds_results_seas = xr.open_dataset(filename)
else:
    delayed_results = {}
    
    turbulence_index = "windspeed"
    for run in list_evaluation + list_historical + list_future:
        experiment_id, source_id, member_id = run.split("_")
        print(run)
        
        # wildcard for year
        filelist = sorted(glob.glob(f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_*.nc"))
        
        def _preprocess(ds):
            """Calculate annual frequency of exceeding p99 threshold"""
            ds = (ds.chunk({"time":-1, "lat":200, "lon":160})>p99)\
                    .where(np.isnan(ds)==False)\
                    .convert_calendar("standard")
            return ds
        
        ds = xr.open_mfdataset(filelist, use_cftime=True,
                               preprocess=_preprocess, combine="nested", 
                               concat_dim="time")
        
        ds = ds.resample({"time":"QS-DEC"},).mean(["time"]).assign_coords({"run":run})
    
        delayed_results[run] = ds
    
    
    # Calculate results
    results = []
    
    for run in list_evaluation + list_historical + list_future:
        print(run)
        results.append(delayed_results[run].compute())
    
    # then save to netCDF
    ds_results_seas = xr.concat(results, dim="run",)
    
    print("Saving to netcdf")
    ds_results.to_netcdf(filename)
    print(f"Made '{filename}'")
                         
ds_results_seas

In [None]:
# percentiles


In [None]:
%%time
# annual average value mapped
filename = f"/home/563/gt3409/turbulence_AUSCAT/{turbulence_index}-percentiles_AUS-15_BOM_BARPA-R_v1-r1_6hr_ann.nc"
if os.path.exists(filename):
    print(f"File '{filename}' already exists.")
else:
    delayed_results = {}
    
    turbulence_index = "windspeed"
    for run in list_evaluation: # + list_historical + list_future:
        experiment_id, source_id, member_id = run.split("_")
        print(run)
        
        # wildcard for year
        filelist = sorted(glob.glob(f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_*.nc"))
        
        def _preprocess(ds):
            """Calculate annual frequency of exceeding p99 threshold"""
            ds = (ds.chunk({"time":-1, "lat":-1, "lon":-1}))\
                        .convert_calendar("standard")\
                        .where(np.isnan(ds)==False)\
                        .resample({"time":"YE"},).quantile(np.arange(0,1,0.01),
                                                           dim=["time", "lat", "lon"],
                                                           method="lower",
                                                          skipna=True,)
            return ds
        
        ds = xr.open_mfdataset(filelist[:2], use_cftime=True,
                               preprocess=_preprocess, combine="nested", 
                               concat_dim="time").assign_coords({"run":run})
    
        delayed_results[run] = ds
    
    
    # Calculate results
    results = []
    
    for run in list_evaluation: # + list_historical + list_future:
        print(run)
        results.append(delayed_results[run].compute())
    
    # then save to netCDF
    ds_results_ann = xr.concat(results, dim="run",)
    
    # print("Saving to netcdf")
    # ds_results_ann.to_netcdf(filename)
    # print(f"Made '{filename}'")
                         
    ds_results_ann

2025-10-01 18:25:29,532 - distributed.nanny - ERROR - Worker process died unexpectedly
Process Dask Worker process (from Nanny):
Traceback (most recent call last):
  File "/g/data/hh5/public/apps/miniconda3/envs/analysis3-24.04/lib/python3.10/site-packages/distributed/compatibility.py", line 236, in asyncio_run
    return loop.run_until_complete(main)
  File "/g/data/hh5/public/apps/miniconda3/envs/analysis3-24.04/lib/python3.10/asyncio/base_events.py", line 636, in run_until_complete
    self.run_forever()
  File "/g/data/hh5/public/apps/miniconda3/envs/analysis3-24.04/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
    self._run_once()
  File "/g/data/hh5/public/apps/miniconda3/envs/analysis3-24.04/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
    handle._run()
  File "/g/data/hh5/public/apps/miniconda3/envs/analysis3-24.04/lib/python3.10/asyncio/events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "/g/data/hh5/publ