In [None]:
import xarray
import cmocean as cm
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
from pathlib import Path
from xmovie import Movie
import matplotlib.path as mpath
import cartopy.feature as cft
import numpy as np
import intake 
import sys  

from pathlib import Path

from os import environ
environ["PYTHONWARNINGS"] = "ignore"

import warnings
import xarray as xr
warnings.filterwarnings("ignore") # Suppress warnings for these docs

In [None]:
import cosima_cookbook as cc
db = 'local_cc_scratch_online_full_cycle.db'
session = cc.database.create_session(db)

In [None]:
%%time
directory_list=['/g/data/e14/rmh561/access-om2/archive/025deg_jra55_iaf_cycle6_online_mlt']
cc.database.build_index(directory_list, session)

In [None]:
from dask.distributed import Client

client = Client(threads_per_worker=1)
client

# SST - Create Zarr Stores

In [4]:
import cosima_cookbook as cc

db = 'local_cc_scratch_online_update.db'
session = cc.database.create_session(db)

var = 'sst'

In [20]:
data = cc.querying.getvar(
        expt='025deg_jra55_iaf_cycle6_online_mlt',
        variable=var,
        session=session,
        frequency='1 daily',
        attrs={'cell_methods': 'time: mean'},
    ).sel(time=slice('1982', '2011')).chunk({
    'time': -1,
    'xt_ocean': 120,   
    'yt_ocean': 128   
}).sel(yt_ocean = slice(-82,-50)).sel(xt_ocean = slice(20, 80)).rename({'xt_ocean': 'lon', 'yt_ocean': 'lat'})

In [21]:
sst = data - 273.15
sst

Unnamed: 0,Array,Chunk
Bytes,2.61 GiB,642.01 MiB
Shape,"(10957, 266, 240)","(10957, 128, 120)"
Dask graph,6 chunks in 89 graph layers,6 chunks in 89 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.61 GiB 642.01 MiB Shape (10957, 266, 240) (10957, 128, 120) Dask graph 6 chunks in 89 graph layers Data type float32 numpy.ndarray",240  266  10957,

Unnamed: 0,Array,Chunk
Bytes,2.61 GiB,642.01 MiB
Shape,"(10957, 266, 240)","(10957, 128, 120)"
Dask graph,6 chunks in 89 graph layers,6 chunks in 89 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [22]:
#Write SST to a zarr datastore to prevent memory blowing up when doing thresholds
sst.to_zarr('/g/data/e14/ts7341/Analysis_Using_Daily_JRA/JRA_SST_Processed/SST_Zarr_Stores/025deg_jra55_iaf_cycle6_online_mlt_zarr_20_to_80_lat_m50_to_m82.zarr')

<xarray.backends.zarr.ZarrStore at 0x149f8a6ebac0>

In [10]:
ds_input = xr.open_dataset('/g/data/e14/ts7341/Analysis_Using_Daily_JRA/JRA_SST_Processed/SST_Zarr_Stores/025deg_jra55_iaf_cycle6_online_mlt_zarr_m280_to_m190_lat_m50_to_m82.zarr')
print(ds_input)

<xarray.Dataset> Size: 4GB
Dimensions:  (lat: 266, lon: 360, time: 10957)
Coordinates:
  * lat      (lat) float64 2kB -81.08 -80.97 -80.87 ... -50.46 -50.3 -50.14
  * lon      (lon) float64 3kB -279.9 -279.6 -279.4 ... -190.6 -190.4 -190.1
  * time     (time) datetime64[ns] 88kB 1982-01-01T12:00:00 ... 2011-12-31T12...
Data variables:
    sst      (time, lat, lon) float32 4GB ...


## SST - Create Climatologies

In [27]:
os.chdir('/g/data/e14/ts7341/Analysis_Using_Daily_JRA/JRA_SST_Processed/MHW_Toolbox')
import bipolarMhwToolBox as MHW

In [28]:
import xarray as xr
import glob
import os

# Input and output directories
zarr_dir = '/g/data/e14/ts7341/Analysis_Using_Daily_JRA/JRA_SST_Processed/SST_Zarr_Stores/'  
output_dir = '/g/data/e14/ts7341/Analysis_Using_Daily_JRA/JRA_SST_Processed/Climatology'

os.makedirs(output_dir, exist_ok=True)

# Get all matching Zarr store paths
zarr_stores = glob.glob(os.path.join(zarr_dir, '025deg_jra55_iaf_cycle6_online_mlt_zarr_*.zarr'))

for zarr_path in zarr_stores:
    base_name = os.path.basename(zarr_path).replace('.zarr', '').replace('rechunked_', 'clim_rechunked_')
    print(base_name)
    output_path = os.path.join(output_dir, f'{base_name}.nc')

    if os.path.exists(output_path):
        print(f'Skipping (already exists): {output_path}')
        continue

    print(f'Processing: {zarr_path}')
    
    ds_input = xr.open_zarr(zarr_path).chunk(dict(time=-1))
    print(ds_input)

    print(ds_input)

    # Compute climatologies
    Seas = MHW.smoothedClima_mhw(ds_input.sst)

    # Persist result before writing
    Seas.persist()
    
    # Write to NetCDF
    print(f'Writing to: {output_path}')
    Seas.to_netcdf(output_path)

025deg_jra55_iaf_cycle6_online_mlt_zarr_m100_to_m40_lat_m50_to_m82
Processing: /g/data/e14/ts7341/Analysis_Using_Daily_JRA/JRA_SST_Processed/SST_Zarr_Stores/025deg_jra55_iaf_cycle6_online_mlt_zarr_m100_to_m40_lat_m50_to_m82.zarr
<xarray.Dataset> Size: 3GB
Dimensions:  (lat: 266, lon: 240, time: 10957)
Coordinates:
  * lat      (lat) float64 2kB -81.08 -80.97 -80.87 ... -50.46 -50.3 -50.14
  * lon      (lon) float64 2kB -99.88 -99.62 -99.38 ... -40.62 -40.38 -40.12
  * time     (time) datetime64[ns] 88kB 1982-01-01T12:00:00 ... 2011-12-31T12...
Data variables:
    sst      (time, lat, lon) float32 3GB dask.array<chunksize=(10957, 128, 120), meta=np.ndarray>
<xarray.Dataset> Size: 3GB
Dimensions:  (lat: 266, lon: 240, time: 10957)
Coordinates:
  * lat      (lat) float64 2kB -81.08 -80.97 -80.87 ... -50.46 -50.3 -50.14
  * lon      (lon) float64 2kB -99.88 -99.62 -99.38 ... -40.62 -40.38 -40.12
  * time     (time) datetime64[ns] 88kB 1982-01-01T12:00:00 ... 2011-12-31T12...
Data variables

# SST - Create Thresholds

In [40]:
import xarray as xr
import glob
import os

# Input and output directories
zarr_dir = '/g/data/e14/ts7341/Analysis_Using_Daily_JRA/JRA_SST_Processed/SST_Zarr_Stores'  
output_dir = '/g/data/e14/ts7341/Analysis_Using_Daily_JRA/JRA_SST_Processed/Thresholds90thPercentile'

os.makedirs(output_dir, exist_ok=True)

# Get all matching Zarr store paths
zarr_stores = glob.glob(os.path.join(zarr_dir, '025deg_jra55_iaf_cycle6_online_mlt_zarr_*.zarr'))

for zarr_path in zarr_stores:
    
    # Construct output filename
    base_name = os.path.basename(zarr_path).replace('.zarr', '').replace('rechunked_', 'clim_rechunked_')
    output_path = os.path.join(output_dir, f'{base_name}.nc')

    # Skip if already processed
    if os.path.exists(output_path):
        print(f'Skipping (already exists): {output_path}')
        continue

    print(f'\nProcessing: {zarr_path}')
    
    # Open the Zarr dataset
    ds_input = xr.open_zarr(zarr_path).chunk({'time': -1})
    print(f'Dataset loaded with variables: {list(ds_input.data_vars)}')

    # Compute 90th percentile threshold from the function
    Thresh = MHW.smoothedThresh_mhw(ds_input.sst)

    # Persist result before writing
    Thresh = Thresh.persist()
    
    # Save to NetCDF
    print(f'Writing to: {output_path}')
    Thresh.to_netcdf(output_path)


Processing: /g/data/e14/ts7341/Analysis_Using_Daily_JRA/JRA_SST_Processed/SST_Zarr_Stores/025deg_jra55_iaf_cycle6_online_mlt_zarr_m100_to_m40_lat_m50_to_m82.zarr
Dataset loaded with variables: ['sst']
Writing to: /g/data/e14/ts7341/Analysis_Using_Daily_JRA/JRA_SST_Processed/Thresholds90thPercentile/025deg_jra55_iaf_cycle6_online_mlt_zarr_m100_to_m40_lat_m50_to_m82.nc

Processing: /g/data/e14/ts7341/Analysis_Using_Daily_JRA/JRA_SST_Processed/SST_Zarr_Stores/025deg_jra55_iaf_cycle6_online_mlt_zarr_m280_to_m220_lat_m50_to_m82.zarr
Dataset loaded with variables: ['sst']
Writing to: /g/data/e14/ts7341/Analysis_Using_Daily_JRA/JRA_SST_Processed/Thresholds90thPercentile/025deg_jra55_iaf_cycle6_online_mlt_zarr_m280_to_m220_lat_m50_to_m82.nc

Processing: /g/data/e14/ts7341/Analysis_Using_Daily_JRA/JRA_SST_Processed/SST_Zarr_Stores/025deg_jra55_iaf_cycle6_online_mlt_zarr_m40_to_20_lat_m50_to_m82.zarr
Dataset loaded with variables: ['sst']
Writing to: /g/data/e14/ts7341/Analysis_Using_Daily_JRA/