# AK spruce beetle outbreak risk pipeline

This notebook is used for creating a climate-based dataset of spruce beetle outbreak risk.

It is currently under development.

Set up path variables and slurm variables:

In [1]:
from pathlib import Path


met_dir = Path("/Data/Base_Data/Climate/AK_NCAR_12km/met")
tmp_fn = "{}_{}_BCSD_met_{}.nc4"
scratch_dir = Path("/atlas_scratch/kmredilla/beetles")
slurm_dir = scratch_dir.joinpath("slurm")
slurm_dir.mkdir(exist_ok=True, parents=True)

# init script for conda on compute nodes

slurm_email = "kmredilla@alaska.edu"
partition = "main"
conda_init_script = "/home/UA/kmredilla/conda_init.sh"
conda_env_name = "py39"
ncpus = 32

compute_yearly_risk = "/workspace/UA/kmredilla/spruce-beetle-risk/compute_yearly_risk.py"

Create a slurm sbatch script for each model, scenario, and era combination. Each job will occupy a node to read the data in parallel and return a risk dataset. Define a function to create this script:

In [2]:
def write_sbatch_yearly_risk(
    slurm_email,
    partition,
    conta_init_script,
    conda_env_name,
    sbatch_fp,
    sbatch_out_fp,
    compute_yearly_risk,
    met_dir,
    tmp_fn,
    era,
    model,
    scenario,
    ncpus,
    risk_fp
):
    sbatch_head = (
        "#!/bin/sh\n"
        "#SBATCH --nodes=1\n"
        "#SBATCH --cpus-per-task={}\n"
        "#SBATCH --mail-type=FAIL\n"
        f"#SBATCH --mail-user={slurm_email}\n"
        f"#SBATCH -p {partition}\n"
        "#SBATCH --output {}\n"
        # print start time
        "echo Start slurm && date\n"
        # prepare shell for using activate - Chinook requirement
        f"source {conda_init_script}\n"
        f"conda activate {conda_env_name}\n"
    )

    pycommands = "\n"
    pycommands += (
        f"python {compute_yearly_risk} "
        f"--met_dir {met_dir} "
        f"--tmp_fn {tmp_fn} "
        f"--era {era} "
        f"--model {model} "
        f"--scenario {scenario} "
        f"--ncpus {ncpus} "
        f"--risk_fp {risk_fp}\n\n"
    )
    commands = sbatch_head.format(ncpus, sbatch_out_fp) + pycommands

    with open(sbatch_fp, "w") as f:
        f.write(commands)


## Process CMIP5 yearly risk dataset

Build sbatch files:

In [4]:
import luts


sbatch_fps = []
risk_fps = []
for model in luts.models:
    for scenario in luts.scenarios:
        for era in luts.eras:
            if era in ["2040-2099"]:
                continue
            sbatch_fp = slurm_dir.joinpath(
                f"yearly_risk_{model}_{scenario}_{era}.slurm"
            )
            sbatch_out_fp = str(sbatch_fp).replace(".slurm", "_%j.out")
            # temporary filepath for yearly data array
            risk_fp = scratch_dir.joinpath(f"{model}_{scenario}_{era}.nc")
            write_sbatch_yearly_risk(
                slurm_email,
                partition,
                conda_init_script,
                conda_env_name,
                sbatch_fp,
                sbatch_out_fp,
                compute_yearly_risk,
                met_dir,
                tmp_fn,
                era,
                model,
                scenario,
                ncpus,
                risk_fp
            )
            sbatch_fps.append(sbatch_fp)
            risk_fps.append(risk_fp)

In [39]:
# remove existing output files if desired
_ = [fp.unlink() for fp in slurm_dir.glob("*.out")]

In [5]:
import subprocess


def submit_sbatch(sbatch_fp):
    """Submit a script to slurm via sbatch
    
    Args:
        sbatch_fp (pathlib.PosixPath): path to .slurm script to submit
        
    Returns:
        job id for submitted job
    """
    out = subprocess.check_output(["sbatch", str(sbatch_fp)])
    job_id = out.decode().replace("\n", "").split(" ")[-1]

    return job_id

Submit the sbatch jobs:

In [6]:
job_ids = [submit_sbatch(fp) for fp in sbatch_fps]

Read in all temporary DataArrays and combine:

In [10]:
import xarray as xr


risk_da = xr.combine_by_coords([xr.open_dataarray(fp) for fp in risk_fps])

Save to a single file on Poseidon:

In [12]:
out_fp = "/workspace/Shared/Tech_Projects/beetles/final_products/yearly_risk.nc"

risk_da.to_netcdf(out_fp)

## Process Daymet yearly risk dataset

In [8]:
import compute_yearly_risk as main


dm_tmp_fn = "{}_met_{}.nc"
dm_era = "1990-2017"
dm_model = "daymet"
dm_ncpus = 32

In [9]:
daymet_risk_comp_ds = main.process_risk_components(
    met_dir, dm_tmp_fn, dm_era, dm_model, dm_ncpus
)

In [13]:
daymet_out_fp = scratch_dir.joinpath("yearly_risk_components_daymet.nc")

daymet_risk_comp_ds.to_netcdf(daymet_out_fp)