# Run the Analog Forecast

Use this notebook to run the analog forecast for any date that is not present in the historical dataset. I.e., if you want to make an actualy forecast, or a hindcast for some date in 2022.

In [8]:
import pandas as pd
import xarray as xr
from scripts.download_era5 import download
from config import data_dir
import luts
import analog_forecast as af

Run this mega cell to make a forecast, supply the desired dates at the top:

In [74]:
%%time
varname = "t2m"
spatial_domain = "alaska"
ref_date = pd.to_datetime("2023-02-25")
use_anom = True
forecast_date = "2023-03-03"

bbox = list(luts.spatial_domains[spatial_domain]["bbox"])
# ERA5 at CDS API needs [N, W, S, E]
bbox = [bbox[-1]] + bbox[:3] 

cretrieve_kwargs = {   
    "product_type": "reanalysis",
    "format": "netcdf",
    # only going through 2022 right now to avoid getting "expver=5" (initial release data)
    "year": ref_date.year,
    "month": str(ref_date.month).zfill(2),
    "day": str(ref_date.day).zfill(2),
    "time": "12:00",
    "area": bbox,
}

out_paths = download(
    data_dir,
    dataset=luts.varnames_lu[varname]["era5_dataset_name"],
    varnames=luts.varnames_lu[varname]["era5_long_name"],
    cretrieve_kwargs=cretrieve_kwargs,
    fn_suffix=f"{ref_date.strftime('%Y%m%d')}"
)
ref_fp = out_paths[0]

ref_da = xr.load_dataset(ref_fp)[varname]

sub_da = af.read_subset_era5(spatial_domain, data_dir, varname, use_anom=use_anom)
# need to load and subset raw data as wel for forecast with anomaly based search
if use_anom:
    raw_sub_da = af.read_subset_era5(spatial_domain, data_dir, varname, use_anom=False)
    # compute the climatology from the raw data for the day of year of the reference date
    clim_da = raw_sub_da.sel(time=raw_sub_da.time[sub_da.dayofyear == ref_date.dayofyear]).mean(dim="time")
    ref_anom_da = ref_da - clim_da
    # append this to the sub_da
    sub_da = xr.concat([sub_da.drop("dayofyear"), ref_anom_da], dim="time")
else:
    raw_sub_da = sub_da
    
analogs = af.find_analogs(sub_da, ref_date.strftime('%Y-%m-%d'), print_analogs=True)
forecast = af.make_forecast(raw_sub_da, analogs.time.values, ref_date.strftime('%Y-%m-%d')) - 273.15
print(
    f"\nForecast in C for {forecast_date}:",
    forecast.sel(
        latitude=58.3005, longitude=-134.4201, method="nearest"
    ).sel(time=forecast_date).round(2).values[0]
    end="\n"
)

                                                                                                                                        

   Top 5 Analogs: 
Rank 1:   Date: 1969-04-14;  RMSE: 3.073
Rank 2:   Date: 2017-09-14;  RMSE: 3.088
Rank 3:   Date: 2001-04-09;  RMSE: 3.144
Rank 4:   Date: 2009-03-01;  RMSE: 3.176
Rank 5:   Date: 1984-01-19;  RMSE: 3.188
Forecast in C for 2023-03-03 [-3.67]
CPU times: user 5.22 s, sys: 8.37 s, total: 13.6 s
Wall time: 16.1 s


end