# Model Calibration

This notebook is for calibrating the model. It extracts data from the yearly risk dataset created in `./pipeline.ipynb` for locations and years we have observed data on, and provides other visualization utilities. 

In [86]:
from pathlib import Path


# paths
out_dir = Path("/workspace/Shared/Tech_Projects/beetles/final_products")
yearly_risk_fp = out_dir.joinpath("yearly_risk.nc")

Load yearly risk dataarray:

In [3]:
import xarray as xr


risk_da = xr.open_dataarray(yearly_risk_fp)

Define the locations and years we are interested in:

In [6]:
import numpy as np


locations_lu = {
    "Talkeetna": {
        "years": list(range(2012, 2018)),
        "latlon": (62.3209, -150.1066),
    },
    "Fairbanks": {
        "years": list(range(2014, 2020)),
        "latlon": (64.8401, -147.7200),
    },
    "King Salmon": {
        "years": list(range(2012, 2019)),
        "latlon": (58.6887, -156.6628),
    },
    "Delta Jct": {
        "years": list(range(2014, 2020)),
        "latlon": (64.0401, -145.7344),
    },
}

Define a function to extract the risk values from the dataset:

In [77]:
def extract_risk_df(risk_da, lat, lon, years):
    """Extract a dataframe of risk values for a given location"""
    # get the x and y index values base on WGS84 coordinates
    dist_arr = np.sqrt(
        np.square(risk_da["longitude"] - lon) + np.square(risk_da["latitude"] - lat)
    ).values
    yidx, xidx = np.where(dist_arr == dist_arr.min())
    
    risk_df = risk_da.sel(y=yidx, x=xidx, year=years).drop(["latitude", "longitude"]).to_dataframe(
        "risk"
    ).reset_index().drop(columns=["x", "y"])
    
    return risk_df

Apply the function and create a complete dataframe of extracted values for all desired locations and years:

In [95]:
import pandas as pd


risk_df_list = []
# start of iteration over locations
for location in locations_lu:
    years = locations_lu[location]["years"]
    lat, lon = locations_lu[location]["latlon"]
    # translate longitude to (0, 360) degree scale
    lon += 360
    temp_df = extract_risk_df(risk_da, lat, lon, years)
    temp_df["location"] = location
    risk_df_list.append(temp_df)
    
risk_df = pd.concat(risk_df_list)
risk_df = risk_df[["location"] + list(risk_df.columns[:-1])]

In [96]:
risk_df

Unnamed: 0,location,model,scenario,snow,year,risk
0,Talkeetna,CCSM4,rcp45,low,2012,0.0
1,Talkeetna,CCSM4,rcp45,low,2013,0.0
2,Talkeetna,CCSM4,rcp45,low,2014,0.0
3,Talkeetna,CCSM4,rcp45,low,2015,0.0
4,Talkeetna,CCSM4,rcp45,low,2016,0.0
...,...,...,...,...,...,...
139,Delta Jct,MRI-CGCM3,rcp85,high,2015,0.0
140,Delta Jct,MRI-CGCM3,rcp85,high,2016,0.0
141,Delta Jct,MRI-CGCM3,rcp85,high,2017,0.0
142,Delta Jct,MRI-CGCM3,rcp85,high,2018,0.0
