This notebook uses the processed files for CAT indices to calculate thresholds for moderate or greater MOG turbulence.

First, we select a mid-lat region for defining MOG.

Then we calculate the threshold or thresholds using quantiles.

Once we have defined a threshold, we can use that value to calculate the frequency of exceeding that threshold.

Frequency above thresholds can be used to evaluate time series and trends over time,
as well as spatial distributions of the strongest areas of turbulence

Files with MOG frequency netcdf files are saved for using to make plots



In [1]:
import xarray as xr
import glob
import intake
import numpy as np
import os

import logging
logging.getLogger("flox").setLevel(logging.WARNING)


In [2]:
from plotting_maps.acs_plotting_maps import plot_acs_hazard_multi, plot_acs_hazard, plot_data, cmap_dict, regions_dict
from matplotlib import colors, cm
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt

from dask.diagnostics import ProgressBar
# ProgressBar().register()

import dask
from dask.distributed import Client
client = Client(threads_per_worker=5, n_workers=1)
# client = Client(threads_per_worker=4, n_workers=7)

client

  import pynvml
  import pynvml


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 1
Total threads: 5,Total memory: 18.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:33345,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B

0,1
Comm: tcp://127.0.0.1:36507,Total threads: 5
Dashboard: /proxy/43681/status,Memory: 18.00 GiB
Nanny: tcp://127.0.0.1:42725,
Local directory: /jobfs/154942544.gadi-pbs/dask-scratch-space/worker-dehkl5kr,Local directory: /jobfs/154942544.gadi-pbs/dask-scratch-space/worker-dehkl5kr


ERROR:asyncio:Task exception was never retrieved
future: <Task finished name='Task-217021' coro=<Client._gather.<locals>.wait() done, defined at /g/data/xp65/public/apps/med_conda/envs/analysis3-25.10/lib/python3.11/site-packages/distributed/client.py:2384> exception=AllExit()>
Traceback (most recent call last):
  File "/g/data/xp65/public/apps/med_conda/envs/analysis3-25.10/lib/python3.11/site-packages/distributed/client.py", line 2393, in wait
    raise AllExit()
distributed.client.AllExit


In [3]:
def suggest_chunking(client, 
                     worker_memory_gb=126/4, 
                     reserve_fraction=0.5, 
                     dtype="float64", 
                     time_steps_per_year=1460):
    # Connect to active Dask client
    cluster_info = client.scheduler_info()

    # Detect number of workers and threads per worker
    num_workers = len(cluster_info["workers"])
    threads_per_worker = list(cluster_info["workers"].values())[0]["nthreads"]
    concurrent_chunks_per_worker = threads_per_worker  # For I/O-bound tasks

    # Calculate safe memory per worker
    usable_mem_bytes = worker_memory_gb * (1 - reserve_fraction) * 1e9
    max_chunk_bytes = usable_mem_bytes / concurrent_chunks_per_worker
    max_chunk_mb = max_chunk_bytes / 1e6

    # Determine bytes per value
    bytes_per_value = 4 if dtype == "float32" else 8
    max_elements = max_chunk_bytes / bytes_per_value
    target_lat_lon_elements = max_elements / time_steps_per_year

    suggested_lat = 436
    suggested_lon = int(target_lat_lon_elements//436)

    # Print results
    print(f"✅ Detected cluster: {num_workers} workers × {threads_per_worker} threads each")
    print(f"✅ Estimated concurrent chunks per worker: {concurrent_chunks_per_worker}")
    print(f"✅ Max safe chunk size: ~{max_chunk_mb:.2f} MB")
    print(f"✅ Suggested chunking: time={time_steps_per_year}, lat={suggested_lat}, lon={suggested_lon}")

# Run the function
suggest_chunking(client)



✅ Detected cluster: 1 workers × 5 threads each
✅ Estimated concurrent chunks per worker: 5
✅ Max safe chunk size: ~3150.00 MB
✅ Suggested chunking: time=1460, lat=436, lon=618


In [4]:
client.scheduler_info()

0,1
Comm: tcp://127.0.0.1:33345,Workers: 1
Dashboard: /proxy/8787/status,Total threads: 5
Started: Just now,Total memory: 18.00 GiB

0,1
Comm: tcp://127.0.0.1:36507,Total threads: 5
Dashboard: /proxy/43681/status,Memory: 18.00 GiB
Nanny: tcp://127.0.0.1:42725,
Local directory: /jobfs/154942544.gadi-pbs/dask-scratch-space/worker-dehkl5kr,Local directory: /jobfs/154942544.gadi-pbs/dask-scratch-space/worker-dehkl5kr
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 88.41 MiB,Spilled bytes: 0 B
Read bytes: 0.0 B,Write bytes: 0.0 B


In [5]:
mid_lat_slice = slice(-50,-25)
lon_slice = slice(90,195)
baseline_time_range = np.arange(1990,2009+1)

turbulence_index = "windspeed"
p95, p99, p999 = (62.835737276077246, 75.76162159729002, 88.1229062187195)

resampler_dict = {"monthly":"ME", "ann":"YE", "seasonal":"QS-DEC"}

In [6]:
list_evaluation = ['evaluation_BARRA-R_r1i1p1f1',
                   'evaluation_ERA5_r1i1p1f1',]

list_historical = ['historical_ACCESS-CM2_r4i1p1f1', 
                   'historical_ACCESS-ESM1-5_r6i1p1f1',
                   'historical_CESM2_r11i1p1f1', 
                   'historical_CMCC-ESM2_r1i1p1f1',
                   'historical_EC-Earth3_r1i1p1f1',
                   'historical_MPI-ESM1-2-HR_r1i1p1f1',
                   'historical_NorESM2-MM_r1i1p1f1',
                  ]

list_ssp126 = [
                 'ssp126_ACCESS-CM2_r4i1p1f1', # need to fix this one
                 'ssp126_ACCESS-ESM1-5_r6i1p1f1',
                 'ssp126_CESM2_r11i1p1f1',
                 'ssp126_CMCC-ESM2_r1i1p1f1',
                 'ssp126_EC-Earth3_r1i1p1f1',
                 'ssp126_MPI-ESM1-2-HR_r1i1p1f1',
                 'ssp126_NorESM2-MM_r1i1p1f1',
              ]

list_ssp370 = ['ssp370_ACCESS-CM2_r4i1p1f1',
                 'ssp370_ACCESS-ESM1-5_r6i1p1f1',
                 'ssp370_CESM2_r11i1p1f1',
                 'ssp370_CMCC-ESM2_r1i1p1f1',
                 'ssp370_EC-Earth3_r1i1p1f1',
                 'ssp370_MPI-ESM1-2-HR_r1i1p1f1',
                 'ssp370_NorESM2-MM_r1i1p1f1',
              ]

list_ssp585 = ['ssp585_ACCESS-CM2_r4i1p1f1',
                 'ssp585_EC-Earth3_r1i1p1f1']

list_future = list_ssp126 + list_ssp370 + list_ssp585

In [7]:
# calculate percentiles /quantiles from original data 

In [8]:
# percentiles


In [37]:
%%time
# annual average distribution
turbulence_index = "windspeed"
for frequency in ["monthly"]:#, "ann", "seasonal"]:
    print(frequency)
    filename = f"/scratch/v46/gt3409/turbulence_AUSCAT/{turbulence_index}-{frequency}-percentiles_AUS-15_BOM_BARPA-R_v1-r1_6hr.nc"
    if os.path.exists(filename):
        print(f"File '{filename}' already exists.")
    else:
        scaling = {"ann":1, "monthly":4, "seasonal":4}[frequency]
        lat_chunksize = int(np.ceil(436/(scaling)))    
        def _preprocess(ds):
            # rechunk such that there are as many chunks as there are years, 
            return ds.dropna("time").astype("float16").resample(time=resampler_dict[frequency]).quantile(np.arange(0,1,0.01), dim="time")
        
        for run in list_evaluation + list_historical:
            experiment_id, source_id, member_id = run.split("_")
            print(run)
            
            run_filename = f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}-{frequency}-percentiles_AUS-15_{run}_BOM_BARPA-R_v1-r1_6hr.nc"
            if os.path.exists(run_filename):
                print(f"File '{run_filename}' already exists.")
            else:
                # wildcard for year
                filelist = [f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_{year}.nc"
                            for year in np.arange(1990, 2009+1)]
                
                ds0=xr.open_dataset(filelist[0])
                ds = xr.open_mfdataset(filelist, 
                                       decode_times=True,
                                       combine="nested", 
                                       concat_dim="time",
                                       preprocess=_preprocess,
                                       chunks = {"time":-1, 
                                                 "lat":lat_chunksize, 
                                                 "lon":-1},
                                      )\
                        .assign_coords({"run":run})\
                        .convert_calendar("standard")\
                        .where(~np.isnan(ds0.isel(time=0)))
        
    
                try:
                    # ds.chunk({"time":-1, "lat":lat_chunksize, "lon":-1})\
                        # .resample(time=resampler_dict[frequency])\
                    ds.to_netcdf(run_filename, compute=True)
                    print(f"Made '{run_filename}'")
                except Exception as e:
                    print(f"Error in {run}: {e}")     

        # print(f"Combine... ")
        # run_filename_list = [f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}-{frequency}-percentiles_AUS-15_{run}_BOM_BARPA-R_v1-r1_6hr.nc"
        #                                 for run in list_evaluation + list_historical]
        # ds_results = xr.open_mfdataset(run_filename_list,
        #                                concat_dim = "run",
        #                                combine="nested",
        #                                )
        # ds_results.to_netcdf(filename)
        # print(f"Made '{filename}'")
    # then delete temp files
    
    # # break out of frequency loop    
    # break
        # # Calculate results
        # results = []
        
        # for run in list_evaluation: # + list_historical + list_future:
        #     print(run)
        #     results.append(delayed_results[run].compute())
        
        # # then save to netCDF
        # ds_results_ann = xr.concat(results, dim="run",)
        
        # # print("Saving to netcdf")
        # # ds_results_ann.to_netcdf(filename)
        # # print(f"Made '{filename}'")
                             
        # ds_results_ann

monthly
evaluation_BARRA-R_r1i1p1f1


ValueError: __resample_dim__ must not be empty

In [13]:
cat_name = "barpa"
col = intake.open_esm_datastore(f"/g/data/lp01/collections/py3.9_dev/nci-{cat_name}.json")

# Edit this cell
var_list = ["ua200", "ua250", "ua300", "va200", "va250", "va300", "ta200", "ta250", "ta300", "zg200", "zg250", "zg300"]
table_id = "6hr"
scenarios = ["historical","ssp126", "ssp370", "ssp585", "evaluation"]

# change this query to select a subset of the data you are interested in
query = dict(variable_id = var_list[0],
             table_id = table_id,
             experiment_id = scenarios,
            )

cat = col.search(**query)
cat.unique()

activity_id                                                    [BARPA-R]
institution_id                                                     [BOM]
version                                                      [v20231001]
variable_id                                                      [ua200]
table_id                                                           [6hr]
source_id              [ACCESS-CM2, ACCESS-ESM1-5, CESM2, CMCC-ESM2, ...
experiment_id           [historical, ssp126, ssp370, ssp585, evaluation]
member_id                      [r4i1p1f1, r6i1p1f1, r11i1p1f1, r1i1p1f1]
grid_label                                                      [AUS-15]
time_range             [196001-196012, 196101-196112, 196201-196212, ...
path                   [/g/data/py18/BARPA/output/CMIP6/DD/AUS-15/BOM...
derived_variable_id                                                   []
dtype: object

In [34]:
# get the list of indices for evaluation, historical and future groups.
cat_df_max = cat.df.groupby(["variable_id", "experiment_id", "source_id", "member_id"]).max().reset_index()
cat_df_max["index"] = [f'{cat_df_max.iloc[i]["experiment_id"]}_{cat_df_max.iloc[i]["source_id"]}_{cat_df_max.iloc[i]["member_id"]}' for i in np.arange(len(cat_df_max))]
cat_df_max = cat_df_max.set_index("index")
# cat_df_max

# indices for evaluation, historical and future groups. These will share time ranges
i_evaluation = cat_df_max.loc[cat_df_max["experiment_id"].isin(["evaluation"])].index
i_historical = cat_df_max.loc[cat_df_max["experiment_id"].isin(["historical"])].index
i_future = cat_df_max.loc[cat_df_max["experiment_id"].isin(["ssp126", "ssp370", "ssp585"])].index

In [2]:
%%time
turbulence_index = "windspeed"
for run in ['evaluation_ERA5_r1i1p1f1']:
    experiment_id, source_id, member_id = run.split("_")
    print(run)
    time_range = baseline_time_range
    
    filelist = [f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_{year}.nc" 
     for year in time_range]
    
    def _preprocess(ds, q=[0.95, 0.99, 0.999],):
        return ds.sel(lat=mid_lat_slice, lon=lon_slice).chunk({"time":-1, "lat":-1, "lon":-1}).quantile(q, dim=["time", "lat", "lon"])

    ds = xr.open_mfdataset(filelist, use_cftime=True, preprocess=_preprocess, combine="nested", concat_dim="time")
    ds = ds.compute()
    
p95, p99, p999 = ds.mean("time")["windspeed"].values
p95, p99, p999

evaluation_ERA5_r1i1p1f1


NameError: name 'baseline_time_range' is not defined

In [40]:
%%time
# Determine threshold MOG from evaluation dataset
turbulence_index = "windspeed"
for run in ['evaluation_BARRA-R_r1i1p1f1']:
    experiment_id, source_id, member_id = run.split("_")
    print(run)
    time_range = baseline_time_range
    
    filelist = [f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_{year}.nc" 
     for year in time_range]
    
    def _preprocess(ds, q=[0.95, 0.99, 0.999],):
        return ds.sel(lat=mid_lat_slice, lon=lon_slice).chunk({"time":-1, "lat":-1, "lon":-1}).quantile(q, dim=["time", "lat", "lon"])

    ds = xr.open_mfdataset(filelist, decode_times=True, preprocess=_preprocess, combine="nested", concat_dim="time")
    ds = ds.compute()
p95, p99, p999 = ds.mean("time")["windspeed"].values
p95, p99, p999

evaluation_BARRA-R_r1i1p1f1
CPU times: user 4.84 s, sys: 2.1 s, total: 6.94 s
Wall time: 1min 40s


(62.835737276077246, 75.76162159729002, 88.1229062187195)

In [43]:
# calculate frequency above thresholds:

In [None]:
%%time
# annual monthly value mapped
# using XXL 28 cores client = Client(threads_per_worker=4, n_workers=2)

frequency = "monthly"
for frequency in ["monthly"]:# , "ann"]:    
    filename = f"/scratch/v46/gt3409/turbulence_AUSCAT/{turbulence_index}-{frequency}-freq-above-p99_mapped_new.nc"
    if os.path.exists(filename):
        print(f"File '{filename}' already exists.")
    else:
        def _preprocess(ds):    
            """Calculate frequency of exceeding p99 threshold"""
            # ds = ds.chunk({"time": -1, "lat": 10, "lon": 40})
            return (ds.dropna("time") > p99).astype("float32").where(~np.isnan(ds)).resample(time=resampler_dict[frequency]).mean("time")
            
        computed_files = []
        for run in list_evaluation + list_historical + list_future:
            file_to_compute = f"/scratch/v46/gt3409/TMP_{turbulence_index}-freq-above-p99/TMP_{turbulence_index}-{frequency}-freq-above-p99_{run}.nc"
            
            if os.path.exists(file_to_compute):
                continue
                print(f"File '{file_to_compute}' already exists.")
            else:
                experiment_id, source_id, member_id = run.split("_")
                print(f"Computing {run}...")
                
                # wildcard for year
                filelist = sorted(glob.glob(f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_*.nc"))
                
                delayed_results = xr.open_mfdataset(filelist, decode_times=False,
                                                       preprocess=_preprocess, combine="nested", 
                                                       concat_dim="time", 
                                                   chunks={"time": -1, "lat": -1, "lon": -1})\
                                        .assign_coords({"run":run})\
                                        .convert_calendar("standard")
                try:
                    delayed_results.compute().to_netcdf(file_to_compute)
                    print(f"file saved {file_to_compute}")
                except Exception as e:
                    print(f"Error in {run}: {e}")

    
            computed_files.append(file_to_compute)
    
        # open all made files, combine and compute
        ds = xr.open_mfdataset(computed_files,
                           combine="nested",
                           concat_dim="run",
                           ).compute()
        ds.to_netcdf(filename)
        # select mid lat box and compute frequency
        ds.sel(lat=mid_lat_slice, lon=lon_slice)\
          .mean(["lat", "lon"])\
          .to_netcdf(f"/scratch/v46/gt3409/turbulence_AUSCAT/{turbulence_index}-{frequency}-freq-above-p99_midlatbox.nc")
        print(f"Made {filename}")


Computing ssp126_ACCESS-CM2_r4i1p1f1...


In [None]:
%%time
# use client = Client(threads_per_worker=1, n_workers=4, timeout="600s") with 28cores
frequency = "ann"

# annual average value mapped
filename = f"/scratch/v46/gt3409/turbulence_AUSCAT/{turbulence_index}-{frequency}-freq-above-p99_mapped.nc"
if os.path.exists(filename):
    print(f"File '{filename}' already exists.")
else:
    def _preprocess(ds):    
        """Calculate annual frequency of exceeding p99 threshold"""
        ds = ds.chunk({"time": -1, "lat": 10, "lon": 40})
        new_ds = (ds > p99).astype("float32").where(~np.isnan(ds))
        new_ds = new_ds.resample(time=resampler_dict[frequency]).mean("time")
        return new_ds
        
    for run in list_evaluation + list_historical + list_future:
        file_to_compute = f"/scratch/v46/gt3409/TMP_{turbulence_index}-freq-above-p99/TMP_{turbulence_index}-freq-above-p99_{run}.nc"
        # file_to_compute = f"/scratch/v46/gt3409/TMP_{turbulence_index}-freq-above-p99/TMP_{turbulence_index}-{frequency}-freq-above-p99_{run}.nc"
        if os.path.exists(file_to_compute):
            continue
            print(f"File '{file_to_compute}' already exists.")
        else:
            experiment_id, source_id, member_id = run.split("_")
            print(f"Computing {run}...")
            
            # wildcard for year
            filelist = sorted(glob.glob(f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_*.nc"))
            
            delayed_results = xr.open_mfdataset(filelist, use_cftime=False,
                                                   preprocess=_preprocess, combine="nested", 
                                                   concat_dim="time", )\
                                    .assign_coords({"run":run})\
                                    .convert_calendar("standard")

            delayed_results.compute().to_netcdf(file_to_compute)
            print(f"file saved {file_to_compute}")

    # open all made files, combine and compute
    # ds = xr.open_mfdataset([f"/scratch/v46/gt3409/TMP_{turbulence_index}-freq-above-p99/TMP_{turbulence_index}-{frequency}-freq-above-p99_{run}.nc"
    ds = xr.open_mfdataset([f"/scratch/v46/gt3409/TMP_{turbulence_index}-freq-above-p99/TMP_{turbulence_index}-freq-above-p99_{run}.nc"
                        for run in list_evaluation + list_historical + list_future],
                       combine="nested",
                       concat_dim="run",
                       ).compute()
    ds.to_netcdf(filename)
    # select mid lat box and compute frequency
    ds.sel(lat=mid_lat_slice, lon=lon_slice)\
      .mean(["lat", "lon"])\
      .to_netcdf(f"/scratch/v46/gt3409/turbulence_AUSCAT/{turbulence_index}-{frequency}-freq-above-p99_midlatbox.nc")
    print(f"Made {filename}")


In [None]:
%%time

#seasonal must be different due to  seasonal resampling after open mf

resampler_dict = {"monthly":"ME", "ann":"YE", "seasonal":"QS-DEC"}
frequency = "seasonal"
  
filename = f"/scratch/v46/gt3409/turbulence_AUSCAT/{turbulence_index}-{frequency}-freq-above-p99_mapped.nc"
if os.path.exists(filename):
    print(f"File '{filename}' already exists.")
else:
    def _preprocess(ds):    
        """Calculate frequency of exceeding p99 threshold"""
        return (ds > p99).astype("float32").where(~np.isnan(ds))
        
    computed_files = []
    for run in list_evaluation + list_historical + list_future:
        file_to_compute = f"/scratch/v46/gt3409/TMP_{turbulence_index}-freq-above-p99/TMP_{turbulence_index}-{frequency}-freq-above-p99_{run}.nc"
        
        if os.path.exists(file_to_compute):
            continue
            print(f"File '{file_to_compute}' already exists.")
        else:
            experiment_id, source_id, member_id = run.split("_")
            print(f"Computing {run}...")
            
            # wildcard for year
            filelist = sorted(glob.glob(f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_*.nc"))
            
            delayed_results = xr.open_mfdataset(filelist, use_cftime=False,
                                                   preprocess=_preprocess, combine="nested", 
                                                   concat_dim="time", 
                                                    chunks={"time": -1, "lat": -1, "lon": -1})\
                                    .chunk({"time": -1,"lat": 4, "lon": -1})\
                                    .resample(time=resampler_dict[frequency]).mean("time")\
                                    .assign_coords({"run":run})\
                                    .convert_calendar("standard")
            try:
                delayed_results.compute().to_netcdf(file_to_compute)
                print(f"file saved {file_to_compute}")
            except Exception as e:
                print(f"Error in {run}: {e}")

        computed_files.append(file_to_compute)

    # open all made files, combine and compute
    ds = xr.open_mfdataset(computed_files,
                       combine="nested",
                       concat_dim="run",
                       ).compute()
    ds.to_netcdf(filename)
    # select mid lat box and compute frequency
    ds.sel(lat=mid_lat_slice, lon=lon_slice)\
      .mean(["lat", "lon"])\
      .to_netcdf(f"/scratch/v46/gt3409/turbulence_AUSCAT/{turbulence_index}-{frequency}-freq-above-p99_midlatbox.nc")
    print(f"Made {filename}")


Computing ssp370_MPI-ESM1-2-HR_r1i1p1f1...
file saved /scratch/v46/gt3409/TMP_windspeed-freq-above-p99/TMP_windspeed-seasonal-freq-above-p99_ssp370_MPI-ESM1-2-HR_r1i1p1f1.nc
Computing ssp370_NorESM2-MM_r1i1p1f1...
file saved /scratch/v46/gt3409/TMP_windspeed-freq-above-p99/TMP_windspeed-seasonal-freq-above-p99_ssp370_NorESM2-MM_r1i1p1f1.nc
Computing ssp585_ACCESS-CM2_r4i1p1f1...
file saved /scratch/v46/gt3409/TMP_windspeed-freq-above-p99/TMP_windspeed-seasonal-freq-above-p99_ssp585_ACCESS-CM2_r4i1p1f1.nc
Computing ssp585_EC-Earth3_r1i1p1f1...


In [None]:
%%time
# annual average value for mid latitudes
filename = f"/scratch/v46/gt3409/turbulence_AUSCAT/{turbulence_index}-freq-above-p99_AUS-15_BOM_BARPA-R_v1-r1_6hr_ann.nc"
if os.path.exists(filename):
    print(f"File '{filename}' already exists.")
else:
    delayed_results = {}
    
    turbulence_index = "windspeed"
    for run in list_evaluation + list_historical + list_future:
        experiment_id, source_id, member_id = run.split("_")
        print(run)
        
        # wildcard for year
        filelist = sorted(glob.glob(f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_*.nc"))
        
        def _preprocess(ds):
            """Calculate annual frequency of exceeding p99 threshold"""
            ds = (ds.sel(lat=mid_lat_slice, lon=lon_slice)>p99)\
                    .mean(["time", "lat", "lon"])\
                    .assign_coords({"time":ds.isel({"time":0})["time"].dt.year})
            
            # ds = (ds.sel(lat=mid_lat_slice, lon=lon_slice)>p99).convert_calendar("standard")\
            #         .mean(["lat", "lon"]).resample({"time":"YE"},).mean(["time"])
            return ds
        
        ds = xr.open_mfdataset(filelist, use_cftime=True,
                               preprocess=_preprocess, combine="nested", 
                               concat_dim="time").assign_coords({"run":run})
    
        delayed_results[run] = ds
    
    
    # Calculate results
    results = []
    
    for run in list_evaluation + list_historical + list_future:
        print(run)
        results.append(delayed_results[run].compute())
    
    # then save to netCDF
    ds_results = xr.concat(results, dim="run",)
    
    print("Saving to netcdf")
    ds_results.to_netcdf(filename)
    print(f"Made '{filename}'")
                         
    ds_results

In [None]:
%%time
# monthly average value for mid latitudes
filename = f"/scratch/v46/gt3409/turbulence_AUSCAT/{turbulence_index}-freq-above-p99_AUS-15_BOM_BARPA-R_v1-r1_6hr_monthly.nc"
if os.path.exists(filename):
    print(f"File '{filename}' already exists.")
else:
    delayed_results = {}
    
    turbulence_index = "windspeed"
    for run in list_evaluation + list_historical + list_future:
        experiment_id, source_id, member_id = run.split("_")
        print(run)
        
        # wildcard for year
        filelist = sorted(glob.glob(f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_*.nc"))
        
        def _preprocess(ds):
            """Calculate annual frequency of exceeding p99 threshold"""
            ds = (ds.sel(lat=mid_lat_slice, lon=lon_slice)>p99)\
                    .convert_calendar("standard")\
                    .mean(["lat", "lon"]).resample({"time":"ME"},).mean(["time"])
            return ds
        
        ds = xr.open_mfdataset(filelist, use_cftime=True,
                               preprocess=_preprocess, combine="nested", 
                               concat_dim="time").assign_coords({"run":run})
    
        delayed_results[run] = ds
    
    
    # Calculate results
    results = []
    
    for run in list_evaluation + list_historical + list_future:
        print(run)
        results.append(delayed_results[run].compute())
    
    # then save to netCDF
    ds_results = xr.concat(results, dim="run",)
    
    print("Saving to netcdf")
    ds_results.to_netcdf(filename)
    print(f"Made '{filename}'")
                         
    ds_results

In [None]:
%%time
# seasonal average value for mid latitudes
filename = f"/scratch/v46/gt3409/turbulence_AUSCAT/{turbulence_index}-freq-above-p99_AUS-15_BOM_BARPA-R_v1-r1_6hr_seasonal.nc"
if os.path.exists(filename):
    print(f"File '{filename}' already exists.")
else:
    #took about 90 min with 28 cores, 28 threads and 7 workers
    delayed_results = {}
    
    turbulence_index = "windspeed"
    for run in list_evaluation + list_historical + list_future:
        experiment_id, source_id, member_id = run.split("_")
        print(run)
        
        # wildcard for year
        filelist = sorted(glob.glob(f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_*.nc"))
        
        def _preprocess(ds):
            """Calculate annual frequency of exceeding p99 threshold"""
            ds = (ds.sel(lat=mid_lat_slice, lon=lon_slice)>p99)\
                    .convert_calendar("standard")\
                    .mean(["lat", "lon"])
            return ds
        
        ds = xr.open_mfdataset(filelist, use_cftime=True,
                               preprocess=_preprocess, combine="nested", 
                               concat_dim="time")
        
        ds = ds.resample({"time":"QS-DEC"},).mean(["time"]).assign_coords({"run":run})
    
        delayed_results[run] = ds
    
    
    # Calculate results
    results = []
    
    for run in list_evaluation + list_historical + list_future:
        print(run)
        results.append(delayed_results[run].compute())
    
    # then save to netCDF
    ds_results = xr.concat(results, dim="run",)
    
    print("Saving to netcdf")
    ds_results.to_netcdf(filename)
    print(f"Made '{filename}'")
                         


In [None]:
# for maps

In [None]:
# run = "evaluation_BARRA-R_r1i1p1f1"

# experiment_id, source_id, member_id = run.split("_")
# ds = xr.open_dataset(f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_2000.nc")
# ds

In [None]:
%%time
# monthly average value mapped
filename = f"/scratch/v46/gt3409/turbulence_AUSCAT/{turbulence_index}-freq-above-p99_AUS-15_BOM_BARPA-R_v1-r1_6hr_mon-mapped.nc"
if os.path.exists(filename):
    print(f"File '{filename}' already exists.")
else:
    delayed_results = {}
    
    for run in list_evaluation + list_historical + list_future:
        experiment_id, source_id, member_id = run.split("_")
        print(run)
        
        # wildcard for year
        filelist = sorted(glob.glob(f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_*.nc"))
        
        def _preprocess(ds):
            """Calculate annual frequency of exceeding p99 threshold"""
            ds = (ds.chunk({"time":-1, "lat":100, "lon":80})>p99)\
                        .where(np.isnan(ds)==False)\
                        .convert_calendar("standard")\
                        .resample({"time":"ME"},).mean(["time"])
            return ds
        
        ds = xr.open_mfdataset(filelist, use_cftime=True,
                               preprocess=_preprocess, combine="nested", 
                               concat_dim="time").assign_coords({"run":run})
    
        delayed_results[run] = ds
    
    
    # Calculate results
    results = []
    
    for run in list_evaluation + list_historical + list_future:
        print(run)
        results.append(delayed_results[run].compute())
    
    # then save to netCDF
    ds_results_mon = xr.concat(results, dim="run",)
    
    print("Saving to netcdf")
    ds_results_mon.to_netcdf(filename)
    print(f"Made '{filename}'")
                         
    ds_results_mon

In [None]:
%%time
# seasonal average values mapped
filename = f"/scratch/v46/gt3409/turbulence_AUSCAT/{turbulence_index}-freq-above-p99_AUS-15_BOM_BARPA-R_v1-r1_6hr_seasonal-mapped.nc"
if os.path.exists(filename):
    print(f"File '{filename}' already exists.")
    ds_results_seas = xr.open_dataset(filename)
else:
    delayed_results = {}
    
    turbulence_index = "windspeed"
    for run in list_evaluation + list_historical + list_future:
        experiment_id, source_id, member_id = run.split("_")
        print(run)
        
        # wildcard for year
        filelist = sorted(glob.glob(f"/scratch/v46/gt3409/TMP_{turbulence_index}/TMP_{turbulence_index}_AUS-15_{source_id}_{experiment_id}_{member_id}_BOM_BARPA-R_v1-r1_6hr_*.nc"))
        
        def _preprocess(ds):
            """Calculate annual frequency of exceeding p99 threshold"""
            ds = (ds.chunk({"time":-1, "lat":100, "lon":80})>p99)\
                    .where(np.isnan(ds)==False)\
                    .convert_calendar("standard")
            return ds
        
        ds = xr.open_mfdataset(filelist, use_cftime=True,
                               preprocess=_preprocess, combine="nested", 
                               concat_dim="time")
        
        ds = ds.resample({"time":"QS-DEC"},).mean(["time"]).assign_coords({"run":run})
    
        delayed_results[run] = ds
    
    
    # Calculate results
    results = []
    
    for run in list_evaluation + list_historical + list_future:
        print(run)
        results.append(delayed_results[run].compute())
    
    # then save to netCDF
    ds_results_seas = xr.concat(results, dim="run",)
    
    print("Saving to netcdf")
    ds_results.to_netcdf(filename)
    print(f"Made '{filename}'")
                         
ds_results_seas