# Process NCAR 12km AK indicators

Processing notebook for "NCAR indicators", derived from the 12km NCAR BCSD dataset over Alaska.

In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import rasterio as rio
import xarray as xr
import rioxarray
from pyproj import Proj, Transformer, CRS
from wrf import PolarStereographic

Define some stuff and lookup tables:

In [186]:
out_dir = Path("/atlas_scratch/kmredilla/ncar12km_indicators")
out_dir.mkdir(exist_ok=True)

era_lut = {
    "historical": "1980-2009",
    "midcentury": "2040-2069",
    "longterm": "2070-2099"
}

aggr_var_lut = {
    "min": np.min,
    "mean": np.mean,
    "max": np.max
}

null_value = -9999

temp_out_fp = "ncar12km_indicators_{}_{}_{}_{}_{}.tif"

def get_year_slice(era):
    """Make a slice object from string representation of era"""
    start_year, end_year = era_lut[era].split("-")
    return slice(start_year, end_year)


Load the data:

In [2]:
%%time
ds = xr.load_dataset("/atlas_scratch/kmredilla/ncar12km_indicators.nc")

CPU times: user 1.82 s, sys: 3.79 s, total: 5.6 s
Wall time: 7.22 s


Project the dataset (i.e. make the x and y coordinates match the projection):

In [21]:
wrf_proj_str = PolarStereographic(**{"TRUELAT1": 64, "STAND_LON": -150}).proj4()
wrf_proj = Proj(wrf_proj_str)
wgs_proj = Proj(proj='latlong', datum='WGS84')

transformer = Transformer.from_proj(wgs_proj, wrf_proj)
e, n = transformer.transform(-150, 64)
# Grid parameters
dx, dy = 12000, 12000

ny, nx = ds.longitude.shape

# Down left corner of the domain
x0 = -(nx-1) / 2. * dx + e
y0 = -(ny-1) / 2. * dy + n
# 2d grid
x = np.arange(nx) * dx + x0
y = np.arange(ny) * dy + y0

proj_ds = ds.assign_coords({"y": ("y", y), "x": ("x", x)}).drop(["latitude", "longitude"])
crs_ds = proj_ds.rio.write_crs(wrf_crs)

Iterate over models and scenarios and reproject using rioxarray. Rioxarray can only reproject up to three dimensions, so after subsetting by model and scenario, reproject, summarize by min/mean/max over eras, and write to GeoTIFFs.

Start with Daymet, since this is a standalone case (i.e. no other models / scenarios):

In [212]:
%%time
era = "historical"
scenario = "historical"
model = "daymet"

reproj_ds = crs_ds.sel(model=model, scenario=scenario).rio.reproject(3338)
for aggr_var in aggr_var_lut:
    aggr_func = aggr_var_lut[aggr_var]
    summary_ds = reproj_ds.sel(
        year=get_year_slice(era)
    ).reduce(aggr_func, dim="year")
    for varname in summary_ds.data_vars:
        out_da = summary_ds[varname]
        out_da.data = summary_ds[varname].data.round(1)
        out_da.data[np.isnan(summary_ds[varname].data)] = null_value
        out_da = out_da.rio.write_nodata(null_value)
        out_fp = out_dir.joinpath(temp_out_fp.format(varname, era, model, scenario, aggr_var))
        out_da.rio.to_raster(out_fp, dtype=np.float32)

CPU times: user 3.01 s, sys: 596 ms, total: 3.61 s
Wall time: 3.68 s


Then process projected data by iterating over remaining models / scenarios:

In [214]:
%%time

for model in ["CCSM4", "MRI-CGCM3"]:
    for scenario in ["rcp45", "rcp85"]:
        for era in ["midcentury", "longterm"]:
            reproj_ds = crs_ds.sel(model=model, scenario=scenario).rio.reproject(3338)
            for aggr_var in aggr_var_lut:
                aggr_func = aggr_var_lut[aggr_var]
                summary_ds = reproj_ds.sel(
                    year=get_year_slice(era)
                ).reduce(aggr_func, dim="year")
                for varname in summary_ds.data_vars:
                    out_da = summary_ds[varname]
                    out_da.data = summary_ds[varname].data.round(1)
                    out_da.data[np.isnan(summary_ds[varname].data)] = null_value
                    out_da = out_da.rio.write_nodata(null_value)
                    out_fp = out_dir.joinpath(temp_out_fp.format(varname, era, model, scenario, aggr_var))
                    out_da.rio.to_raster(out_fp, dtype=np.float32)

CPU times: user 25.2 s, sys: 4.5 s, total: 29.7 s
Wall time: 30.7 s


end