## Monterey Match Window Grid Search

In [1]:
# # Google Colab specific
# %cd ~/../content
# !rm -rf openet

# !git clone https://github.com/aetriusgx/openet.git
# %cd openet

# Preparation

In [2]:
from matplotlib.colors import LinearSegmentedColormap
from datetime import datetime, timedelta
from notebook_utils import calculate_metrics, eval_metrics, timeseries_rel, trim_extremes
from pathlib import Path
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import contextily as cx
import geopandas as gpd
import json
import pandas as pd
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns

In [3]:
# Styling Cell
sns.set_theme(context="notebook", style="whitegrid")

SMALL_SIZE = 18
MEDIUM_SIZE = 24
BIGGER_SIZE = 28

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

## Data Tables

### Historical

In [4]:
poly_historical = pd.read_csv('../data/monterey_window_historical.csv', low_memory=False)
poly_historical['time'] = pd.to_datetime(poly_historical['time'])
poly_historical.info()

FileNotFoundError: [Errno 2] No such file or directory: '../data/monterey_window_historical.csv'

In [None]:
poly_historical['time'].max()

### Forecasting Table

In [5]:
# Gather current forecast data for the county
match_unaligned_table = pd.DataFrame()
files = Path(f"../data/forecasts/match_sample/0.0.3/polygon/monterey/sampled").glob("*.csv")

for file in files:
    # splits into [$date, 'window', $match_window, 'forecast.csv']
    parts = str(file.name).split("_")
    data = pd.read_csv(file, low_memory=False)
    data["forecasting_date"] = parts[0]
    data["match_window"] = parts[2]
    match_unaligned_table = pd.concat([data, match_unaligned_table], ignore_index=True)

match_unaligned_table['forecasting_date'] = pd.to_datetime(match_unaligned_table['forecasting_date'])
match_unaligned_table['time'] = pd.to_datetime(match_unaligned_table['time'])
match_unaligned_table.head()

Unnamed: 0,field_id,crop,time,expected_et,expected_eto,expected_etof,forecasting_date,match_window
0,CA_244148,69,2024-01-01,0.705,0.918,0.768,2024-07-08,180
1,CA_244148,69,2024-01-02,0.353,0.459,0.77,2024-07-08,180
2,CA_244148,69,2024-01-03,0.708,0.918,0.771,2024-07-08,180
3,CA_244148,69,2024-01-04,1.491,1.928,0.773,2024-07-08,180
4,CA_244148,69,2024-01-05,1.067,1.377,0.775,2024-07-08,180


In [6]:
# Check that all 50 fields are present for each forecasting date and match window
match_unaligned_table.groupby(['forecasting_date', 'match_window'])[['field_id', 'time']].nunique()

Unnamed: 0_level_0,Unnamed: 1_level_0,field_id,time
forecasting_date,match_window,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-06-03,180,50,366
2024-06-03,60,50,366
2024-06-03,90,50,366
2024-06-10,180,50,366
2024-06-10,60,50,366
2024-06-10,90,50,366
2024-06-17,180,50,366
2024-06-17,60,50,366
2024-06-17,90,50,366
2024-06-24,180,50,366


### Full Table Merge

In [None]:
match_dt = poly_historical.loc[(poly_historical['time'].dt.year == 2024), :]
match_dt = match_dt.merge(match_unaligned_table, on=['field_id', 'time', 'crop'], how='right').set_index(['forecasting_date', 'field_id', 'crop', 'time', 'match_window']).reset_index()
match_dt

## Error Metric Calculation

### Field Metrics

In [None]:
window_metrics_unaligned = (match_unaligned_table[(match_unaligned_table['time'] > match_unaligned_table['forecasting_date']) & (match_unaligned_table['time'] < (match_unaligned_table['forecasting_date']) + timedelta(days=7)) & (match_unaligned_table['time'] < analysis_end_date)]
                .groupby(['forecasting_date', 'match_window'])[list(match_unaligned_table.columns)]
                .apply(eval_metrics, normalize=True, climatology_ref=poly_climatology, avgs_ref=poly_avgs))
window_metrics_unaligned.reset_index().to_csv('../data/metrics/monterey_window_poly_metrics.csv', index=False)

In [None]:
window_metrics_unaligned = pd.read_csv('../data/metrics/monterey_window_poly_metrics.csv').drop(columns='level_1')
window_metrics.head()