In [None]:
from pathlib import Path

import numpy as np
import dask
from dask.distributed import Client
from tqdm.auto import tqdm

import prep_dataset
import compute_degree_days
import reproject
from config import DATA_DIR, OUTPUT_DIR, daymet_dir, reprojected_dir
from config import models, scenarios, metrics, unit_tag

In [None]:
metric_dispatch = dict()
metric_dispatch["air_freezing_index"] = compute_degree_days.compute_cumulative_freezing_index
metric_dispatch["air_thawing_index"] = compute_degree_days.compute_cumulative_thawing_index
metric_dispatch["heating_degree_days"] = compute_degree_days.compute_cumulative_heating_degree_days
metric_dispatch["degree_days_below_zero"] = compute_degree_days.compute_cumulative_degree_days_below_0F

In [None]:
daymet_files = list(daymet_dir.glob("*.nc"))
# Daymet baseline runes 1980 through 2017 so there should be 38 files
assert len(daymet_files) == 38

In [None]:
# we know from our EDA work that this model is missing some data
try:
    models.remove("HadGEM2-ES")
except:
    pass

projected_model_files = []
for model in models:
    model_path = DATA_DIR / model
    input_data = [x for x in list(model_path.rglob("*.nc*"))]
    projected_model_files.extend(input_data)

# there are nine models with two scenarios each, and these data run 1950 through 2099
assert len(projected_model_files) == len(models) * len(scenarios) * 150

In [None]:
%%time
# create dask client
client = Client()

# run the pipeline for daymet first because it has a different structure (no scenarios)

for src_file in tqdm(daymet_files):
    year = int(src_file.name.split('_')[-1].split('.')[0])
    daily_avg_temp_F_ds, raster_creation_profile = prep_dataset.prep_ds(src_file)
    
    for degree_day_metric in metrics:
        result = metric_dispatch[degree_day_metric](daily_avg_temp_F_ds)
        # write the initial GeoTIFF
        reproject.write_raster_to_disk(OUTPUT_DIR / f"daymet_historical_{degree_day_metric}_{year}.tif",
                                       raster_creation_profile,
                                       np.flipud(result.compute())
                                      )

for src_file in tqdm(projected_model_files):
    year = int(src_file.name.split('_')[-1].split('.')[0])
    # get model and scenario
    model_name = src_file.name.split("_")[0]
    scenario_name = src_file.name.split("_")[1]    
    daily_avg_temp_F_ds, raster_creation_profile = prep_dataset.prep_ds(src_file)
    
    for degree_day_metric in metrics:
        result = metric_dispatch[degree_day_metric](daily_avg_temp_F_ds)
        # write the initial GeoTIFF
        reproject.write_raster_to_disk(OUTPUT_DIR / f"{model_name}_{scenario_name}_{degree_day_metric}_{year}.tif",
                                       raster_creation_profile,
                                       np.flipud(result.compute())
                                      )

client.close()

In [None]:
geotiff_fps = list(OUTPUT_DIR.glob("*.tif"))

assert len(geotiff_fps) == len(metrics) * (len(daymet_files) + len(projected_model_files)) 

In [None]:
#next step something like
client = Client()

_ = dask.compute(*[dask.delayed(reproject.reproject_raster)(f, "ncar_12km") for f in geotiff_fps])

client.close()

In [None]:
reproj_geotiff_fps = list(reprojected_dir.glob("*.tif"))
assert len(reproj_geotiff_fps) == len(geotiff_fps)