# Process CORDEX indicators

Processing notebook for CORDEX indicators.

In [167]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import rasterio as rio
import xarray as xr
import rioxarray

Load the data:

In [137]:
%%time
ds = xr.load_dataset("/workspace/Shared/Tech_Projects/TBEC_CMIP5_Processing/final_products/annual_indices.nc")

CPU times: user 2.15 s, sys: 4.22 s, total: 6.37 s
Wall time: 3min 4s


Define some stuff and lookup tables:

In [144]:
out_dir = Path("/atlas_scratch/kmredilla/cordex_indicator_climatologies")
out_dir.mkdir(exist_ok=True)

era_lut = {
    "historical": "1976-2005",
    "midcentury": "2040-2069",
    "longterm": "2070-2099"
}

aggr_var_lut = {
    "min": np.min,
    "mean": np.mean,
    "max": np.max
}

null_value = -9999

def get_year_slice(era):
    """Make a slice object from string representation of era"""
    start_year, end_year = era_lut[era].split("-")
    return slice(start_year, end_year)


def write_geotiff(out_fp, arr):
    """Write an array to an output file"""
    with rio.open(out_fp, "w", **PROFILE) as dst:
        dst.write(arr, 1)
    
    return

Define the expected output CRS and store in global variable:

In [174]:
dy = ds.lat.values[0] - ds.lat.values[1]
dx = ds.lon.values[1] - ds.lon.values[0]
# width = ds.lon.values.shape[0]
# height = ds.lat.values.shape[0]
# reprojected values
width = 155
height = 130
transform = rio.transform.from_origin(
    ds.lon.values.min() - dx, 
    ds.lat.values.max() + dy,
    dx,
    dy
)
crs = rio.crs.CRS.from_epsg("3338")

PROFILE = {
    "driver": "GTiff",
    "crs": crs,
    "transform": transform,
    "width": 155,
    "height": height,
    "count": 1,
    "dtype": np.float32,
    "nodata": null_value,
    "tiled": False,
    "compress": "lzw",
    "interleave": "band",
}

Set the `_FillValue` attribute for all data vars to `np.nan` for reprojecting:

In [180]:
for varname in summary_ds.data_vars:
    summary_ds[varname].attrs["_FillValue"] = np.nan

Process historical


In [186]:
%%time
era = "historical"
scenario = "hist"

for aggr_var in aggr_var_lut:
    aggr_func = aggr_var_lut[aggr_var]
    summary_ds = ds.sel(
        scenario=scenario, year=get_year_slice(era)
    ).groupby("model").reduce(aggr_func, dim="year")
    summary_ds = summary_ds.rio.write_crs(4326).rio.reproject(3338)
    for model in summary_ds.model.values:
        for varname in summary_ds.data_vars:
            arr = summary_ds[varname].sel(model=model).values.round(1)
            arr[np.isnan(arr)] = null_value
            out_fp = out_dir.joinpath(f"cordex_indicators_{varname}_{era}_{model}_{scenario}_{aggr_var}.tif")
            write_geotiff(out_fp, arr)



CPU times: user 5.97 s, sys: 4.28 s, total: 10.3 s
Wall time: 19.3 s


Rrocess future scenarios / eras:

In [192]:
%%time

for era in ["midcentury", "longterm"]:
    for scenario in ["rcp45", "rcp85"]:
        for aggr_var in aggr_var_lut:
            aggr_func = aggr_var_lut[aggr_var]
            summary_ds = ds.sel(
                scenario=scenario, year=get_year_slice(era)
            ).groupby("model").reduce(aggr_func, dim="year")
            for model in summary_ds.model.values:
                for varname in summary_ds.data_vars:
                    arr = summary_ds[varname].sel(model=model).values.round(1)
                    arr[np.isnan(arr)] = null_value
                    out_fp = out_dir.joinpath(f"cordex_indicators_{varname}_{era}_{model}_{scenario}_{aggr_var}.tif")
                    write_geotiff(out_fp, arr)

CPU times: user 16.9 s, sys: 16.7 s, total: 33.7 s
Wall time: 1min 10s


end