In [5]:
import os
import sys
from glob import glob

import cartopy.crs as ccrs
import numpy as np
import xarray as xr

# Local Utils
sys.path.insert(0, "/glade/work/zespinosa/Projects/climate-utils")
from utils import  (
    plot_stationary_sp,
    xarray_monthly_to_time,
    xarray_time_to_monthly,
    detrend_data,
)

In [6]:
"""
Note to self: when scaling memory, we also need to scale the number of processes and the size of the cluster. 
Dask errors are not helpful with this error, so be diligent.
"""
# client.shutdown()
from dask_jobqueue import PBSCluster
cluster = PBSCluster(
    # Job scheduler specific keywords
    project="UWAS0118",
    walltime="06:00:00",
    # queue="economy",
    local_directory="/glade/scratch/zespinosa/",
    # Dask-worker specific keyworkds
    processes=32,  # 32 Number of Python processes to cut up each job
    memory="16", # "16"
)
cluster.scale(4)
from dask.distributed import Client
client = Client(cluster)
client

Perhaps you already have a cluster running?
Hosting the HTTP server on port 42561 instead


0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: /proxy/42561/status,

0,1
Dashboard: /proxy/42561/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.148.10.19:37433,Workers: 0
Dashboard: /proxy/42561/status,Total threads: 0
Started: Just now,Total memory: 0 B


UTILS

In [3]:
def detrend_data(data, x, x_dim, deg=1):
    """
    Detrend data using n-degree least squares fit

    Arguments:
    -----------
        data [Dataset, DataArray](..., x_dim): data to detrend (y)
        x [DataArray](x_dim): dimension to detrend along (x)
        x_dim ([tr]: name of dimension along which to detrend
        deg [int]: degree of polynomial to fit

    Returns:
    --------
        da [Dataset](..., sia, sie): detrended data
    """
    results = data.polyfit(dim=x_dim, skipna=True, deg=deg)
    new_data = data - xr.polyval(x, results.polyfit_coefficients)
    da = xr.DataArray(new_data, coords=data.coords, dims=data.dims, attrs=data.attrs)
    return da

CESM-NUDGED DAILY ATM DATA FOR STORMS (Grab entire month of Nov. and Dec.): 
1. November 14 and Novomber 21 (Ross Sea)
2. December 21 (Weddell Sea)
VARS: 
- siconc
- 2m temp
- mslp
- 10m wind vectors

CESM-NUDGED SEA ICE THERMO and DYNAMIC TENDENCIES

In [None]:
def load_monthly_tendency_data(futdir, histdir, myvariables , start_year=1985):
    # Get Load Files
    dfiles = sorted(
        glob(os.path.join(histdir, "*.h.*"))
        + glob(  # monthly data files 1950-2005
            os.path.join(futdir, "*.h.*")
        )  # monthly data files 2006 - 2024
    )

    drop_vars = xr.open_dataset(dfiles[0]).drop_vars(myvariables).data_vars

    dfiles = [
        f for f in dfiles if int(f.split(".")[-2][:4]) >= start_year
    ]  # remove files before start year
    
    ds = xr.open_mfdataset(
        dfiles,
        drop_variables=drop_vars, # this is absolutely essential (10x faster)
        chunks="auto",
        parallel=True,
        coords="minimal",
        data_vars=myvariables,
    )
    ds = ds.convert_calendar("standard")
    ds["time"] = np.arange("1985-01", "2025-01", dtype="datetime64[M]") # correct 1 month error

    return ds


def process_tendencies():
    myvariables=["daidtd", "daidtt"]
    
    ds_dt = load_monthly_tendency_data(
        futdir="/glade/scratch/wriggles/archive/nudge_era_1950_ens01_21C/ice/hist",
        histdir="/glade/scratch/wriggles/archive/nudge_era_1950_ens01/ice/hist",
        # myvariables=["dvidtd", "dvidtt", "daidtd", "daidtt"]
        myvariables=myvariables,
    )
    ds_dt = ds_dt.rename({"nj": "longitude", "ni": "latitude"})
    # Fix lat/lon data
    pop_areacello = xr.open_dataset("/glade/work/zespinosa/GRIDS/areacello_fx_CESM1-CAM5_historical_r0i0p0.nc")
    ds_dt.TLON[:] = pop_areacello.areacello.lon.data
    ds_dt.TLAT[:] = pop_areacello.areacello.lat.data
    for cvar in myvariables:
        print(f"starting {cvar}")
        ds_monthly = xarray_time_to_monthly(ds_dt[cvar])
        ds_anoms = detrend_data(ds_monthly, ds_monthly.year, "year", deg=1)
        ds_anoms.to_netcdf(f"/glade/work/zespinosa/Projects/antarctic-2022_record-low_nudge-analysis/processed_data/cesm-nudged_{cvar}_anoms_198501-202412.nc")
        ds_monthly.to_netcdf(f"/glade/work/zespinosa/Projects/antarctic-2022_record-low_nudge-analysis/processed_data/cesm-nudged_{cvar}_198501-202412.nc")

process_tendencies()

CESM-NUDGED OCEAN DATA

In [18]:
futdir = "/glade/scratch/wriggles/archive/nudge_era_1950_ens01_21C/ocn/hist"
histdir = "/glade/scratch/wriggles/archive/nudge_era_1950_ens01/ocn/hist"
myvariables = ["SST"]

def load_monthly_pop_data():
    drop_vars = xr.open_dataset(
        os.path.join(histdir, "nudge_era_1950_ens01.pop.h.1987-07.nc")
    ).drop_vars(myvariables)
    drop_vars = drop_vars.data_vars

    dfiles = sorted(
        glob(os.path.join(histdir, "*.h.*"))
        + glob(  # monthly data files 1950-2005
            os.path.join(futdir, "*.h.*")
        )  # monthly data files 2006 - 2024
    )
    dfiles = [
        f for f in dfiles if int(f.split(".")[-2][:4]) >= 1985
    ]  # remove files before 1985
    
    ds_ocn = xr.open_mfdataset(
        dfiles,
        drop_variables=drop_vars, # this is absolutely essential (10x faster)
        chunks="auto",
        parallel=True,
        coords="minimal",
        data_vars=myvariables,
    )
    ds_ocn = ds_ocn.convert_calendar("standard")
    ds_ocn["time"] = np.arange("1985-01", "2025-01", dtype="datetime64[M]") # correct 1 month error

    return ds_ocn


ds_pop = load_monthly_pop_data()
ds_pop_monthly = xarray_time_to_monthly(ds_pop.SST)
ds_pop_anoms = detrend_data(ds_pop_monthly, ds_pop_monthly.year, "year", deg=1)
ds_pop_anoms.to_netcdf("/glade/work/zespinosa/Projects/antarctic-2022_record-low_nudge-analysis/processed_data/cesm-nudged_sst_anoms_198501-202412.nc")
ds_pop_monthly.to_netcdf("/glade/work/zespinosa/Projects/antarctic-2022_record-low_nudge-analysis/processed_data/cesm-nudged_sst_198501-202412.nc")

CESM-NUDGED ATMOS DATA

In [21]:
futdir = "/glade/scratch/wriggles/archive/nudge_era_1950_ens01_21C/atm/hist"
histdir = "/glade/scratch/wriggles/archive/nudge_era_1950_ens01/atm/hist"
savedir = "/glade/work/zespinosa/data/CESM_nudged/"
myvariables = ["PSL"]

def load_monthly_atm_data():
    drop_vars = xr.open_dataset(
        os.path.join(futdir, "nudge_era_1950_ens01_21C.cam.h0.2024-11.nc")
    ).drop_vars(myvariables)
    drop_vars = drop_vars.data_vars

    dfiles = sorted(
        glob(os.path.join(histdir, "*.h0*"))
        + glob(  # monthly data files 1950-2005
            os.path.join(futdir, "*.h0*")
        )  # monthly data files 2006 - 2024
    )
    dfiles = [
        f for f in dfiles if int(f.split(".")[-2][:4]) >= 1985
    ]  # remove files before 1985
    
    ds_atm = xr.open_mfdataset(
        dfiles,
        drop_variables=drop_vars, # this is absolutely essential (10x faster)
        chunks="auto",
        parallel=True,
        coords="minimal",
        data_vars=myvariables,
    )
    ds_atm = ds_atm.convert_calendar("standard")
    ds_atm["time"] = np.arange("1985-01", "2025-01", dtype="datetime64[M]") # correct 1 month error

    return ds_atm


ds_atm = load_monthly_atm_data()
ds_atm_monthly = xarray_time_to_monthly(ds_atm.PSL)
ds_atm_anoms = detrend_data(ds_atm_monthly, ds_atm_monthly.year, "year", deg=1)
ds_atm_anoms.to_netcdf("/glade/work/zespinosa/Projects/antarctic-2022_record-low_nudge-analysis/processed_data/cesm-nudged_psl_anoms_198501-202412.nc")
ds_atm_monthly.to_netcdf("/glade/work/zespinosa/Projects/antarctic-2022_record-low_nudge-analysis/processed_data/cesm-nudged_psl_198501-202412.nc")

CESM-NUDGED SICONC DATA

In [23]:
cesm_ice = xr.open_dataset(
    "/glade/work/zespinosa/Projects/Antarctica_2022/cesm_nudged/ens_01.nc", chunks={"time": 1, "nj": -1, "ni": -1}
)
####### SICONC CESM-Nudged Data #######
siconcCESM = cesm_ice.aice
siconcCESM = siconcCESM.rename({"nj": "longitude", "ni": "latitude"})
# Fix lat/lon data
pop_areacello = xr.open_dataset("/glade/work/zespinosa/GRIDS/areacello_fx_CESM1-CAM5_historical_r0i0p0.nc")
siconcCESM.TLON[:] = pop_areacello.areacello.lon.data
siconcCESM.TLAT[:] = pop_areacello.areacello.lat.data
siconcCESM_monthly = xarray_time_to_monthly(siconcCESM)
siconcCESM_anoms = detrend_data(siconcCESM_monthly, siconcCESM_monthly.year, "year", deg=1)
siconcCESM_anoms.to_netcdf("/glade/work/zespinosa/Projects/antarctic-2022_record-low_nudge-analysis/processed_data/cesm_nudged_siconc-anoms_198501-202412.nc")
siconcCESM_monthly.to_netcdf("/glade/work/zespinosa/Projects/antarctic-2022_record-low_nudge-analysis/processed_data/cesm_nudged_siconc_198501-202412.nc")

LOAD NSIDC MONTHLY DATA

In [8]:
# Timely data
siconcObs = xr.open_dataset(
    "/glade/work/zespinosa/data/nsidc/processed/siconc_NSIDC_197901-202210.nc"
).cdr_seaice_conc_monthly
siObs = xr.open_dataset(
    "/glade/work/zespinosa/data/nsidc/processed/sia_sie_NSIDC_197901-202210.nc"
)

# Monthly data
siconcObs_monthly = xarray_time_to_monthly(siconcObs)
siObs_monthly = xarray_time_to_monthly(siObs)

siconcObs_anoms = detrend_data(siconcObs_monthly, siconcObs_monthly.year, "year", deg=1)
siconcObs_anoms.to_netcdf("/glade/work/zespinosa/Projects/antarctic-2022_record-low_nudge-analysis/processed_data/nsidc_siconc-anoms_197901-202211.nc")

sieObs_anoms = detrend_data(siObs_monthly.sie, siObs_monthly.year, "year", deg=1)
siconcObs_anoms.to_netcdf("/glade/work/zespinosa/Projects/antarctic-2022_record-low_nudge-analysis/processed_data/nsidc_sia-sie-anoms_197901-202211.nc")

NSIDC DAILY DATA FOR STORMS (Grab entire month of Nov. and Dec.): 
1. November 14 and Novomber 21 (Ross Sea)
2. December 21 (Weddell Sea)
siconc

ERA5 Monthly SLP Data

In [66]:
eradir = "/glade/work/zespinosa/data/era5/monthly/MSLP"
dfiles = [os.path.join(eradir, "slp_single_level_1979_2021.nc"), os.path.join(eradir, "slp_single_level_2022.nc")]
era5_slp = xr.open_mfdataset(
    dfiles, 
    parallel=True,
    chunks={"latitude": -1, "longitude": -1, "time": 1}
).sel(expver=1)
era5_slp_monthly = xarray_time_to_monthly(era5_slp.sp)
era5_slp_anoms = detrend_data(era5_slp_monthly, era5_slp_monthly.year, "year", deg=1)
era5_slp_monthly.to_netcdf("/glade/work/zespinosa/Projects/antarctic-2022_record-low_nudge-analysis/processed_data/era5_slp-monthly_197901-202211.nc")
era5_slp_anoms.to_netcdf("/glade/work/zespinosa/Projects/antarctic-2022_record-low_nudge-analysis/processed_data/era5_slp-anoms_197901-202211.nc")

ERA5 Monthly SST Data

In [12]:
eradir = "/glade/work/zespinosa/data/era5/monthly/SST"
dfiles = [os.path.join(eradir, "sst_single_level_1979_2021.nc"), os.path.join(eradir, "sst_single_level_2022.nc")]
era5_sst = xr.open_mfdataset(
    dfiles, 
    parallel=True,
    chunks={"latitude": -1, "longitude": -1, "time": 1}
).sel(expver=1)
era5_sst_monthly = xarray_time_to_monthly(era5_sst.sst)
era5_sst_anoms = detrend_data(era5_sst_monthly, era5_sst_monthly.year, "year", deg=1)
era5_sst_monthly.to_netcdf("/glade/work/zespinosa/Projects/antarctic-2022_record-low_nudge-analysis/processed_data/era5_sst-monthly_197901-202211.nc")
era5_sst_anoms.to_netcdf("/glade/work/zespinosa/Projects/antarctic-2022_record-low_nudge-analysis/processed_data/era5_sst-anoms_197901-202211.nc")

ERA5 DAILY ATM DATA FOR STORMS (Grab entire month of Nov. and Dec.): 
1. November 14 and Novomber 21 (Ross Sea)
2. December 21 (Weddell Sea)
VARS: 
- 2m temp
- mslp
- 10m wind vectors

Use these spatial plots for sanity checks

In [None]:
def spatial_plot(siconc, title, lon, lat, levels=np.arange(-100, 110, 10), cmap="RdBu"):
    fig, ax = plot_stationary_sp()
    img = ax.contourf(
        lon,
        lat,
        siconc,
        transform=ccrs.PlateCarree(), 
        levels=levels,
        cmap=cmap,
    )
    cbar2 = fig.colorbar(img, ax=ax)
    ax.set_title(title)
    fig.set_size_inches(6, 6)

In [28]:
def spatial_plot_atm(siconc, title, lon, lat, levels=np.arange(-14, 16, 2), cmap="RdBu"):
    fig, ax = plot_stationary_sp()
    img = ax.contour(
        lon,
        lat,
        siconc,
        transform=ccrs.PlateCarree(), 
        levels=levels,
        colors=["black"],
        negative_linestyle="dashed",
    )
    ax.clabel(img, img.levels, inline=True, fontsize=14)
    ax.set_title(title)
    fig.set_size_inches(6, 6)