# Pure Forecast Testing Notebook (Matched to Forecasting Model)

This notebook tests the streamway depth forecasting model using **purely forecast precipitation data**.

**Comparison Setup:**
This notebook is aligned with `forecasting_model.ipynb` to ensure comparability:
- **Target**: Change in depth over a **4-hour horizon** (`depth[t+4h] - depth[t]`).
- **Features**: 
    - Past Rain (Lags 2-7h)
    - Future Rain (Forecast 0-3h)
    - **No** `prev_depth` feature (to match the baseline model).
- **Method**: Direct forecasting (predicting T+4h state from T).

In [24]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import os

plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Load Streamway Data

In [25]:
# --- LOAD AND UPDATE STREAMWAY DATA ---
# We load the existing CSV and fetch any new data from the ThingSpeak API to keep it up to date.

import os
import requests
import pandas as pd

csv_file = 'streamway_data.csv'
df_list = []

# Column mapping for renaming - only keeping field1 (streamway depth)
column_mapping = {
    'field1': 'streamway_depth_mm',
}

# 1. Load Existing Data
if os.path.exists(csv_file):
    print("Loading existing data from CSV...")
    df_existing = pd.read_csv(csv_file, index_col=0, parse_dates=True)
    # Ensure existing data is timezone-naive
    if df_existing.index.tz is not None:
        df_existing.index = df_existing.index.tz_localize(None)
    
    latest_timestamp = df_existing.index.max()
    print(f"Latest data in CSV: {latest_timestamp}")
    
    # Start fetching from the latest timestamp in the CSV
    current_end = pd.to_datetime('now').tz_localize(None)
    end_date = latest_timestamp
    
    if current_end > end_date:
        print(f"Fetching new data from {current_end} back to {end_date}")
        fetch_new_data = True
    else:
        print("No new data to fetch")
        fetch_new_data = False
        df_streamway = df_existing
else:
    print("No existing CSV found, fetching all data...")
    df_existing = None
    # Default start date if no CSV exists (e.g. 1 year ago or specific date)
    end_date = pd.to_datetime('2024-05-17 13:00:00') 
    current_end = pd.to_datetime('now').tz_localize(None)
    fetch_new_data = True

# 2. Fetch New Data (if needed)
batch_count = 0
if fetch_new_data:
    try:
        while current_end > end_date:
            batch_count += 1
            # Fetch 8000 results ending at current_end
            url = f'https://thingspeak.mathworks.com/channels/2574933/fields/1.json?end={current_end.strftime("%Y-%m-%d %H:%M:%S")}&results=8000'
            response = requests.get(url)
            data = response.json()
            
            if 'feeds' not in data or not data['feeds']:
                print("No more data returned from API")
                break
                
            df_batch = pd.DataFrame(data['feeds'])
            
            # Keep only the columns we need
            df_batch = df_batch[['entry_id', 'created_at', 'field1']].copy()
            
            df_list.append(df_batch)
            
            # Get time range for this batch
            batch_start = pd.to_datetime(df_batch['created_at'].iloc[0]).tz_localize(None)
            batch_end = pd.to_datetime(df_batch['created_at'].iloc[-1]).tz_localize(None)
            
            current_end = batch_start - pd.Timedelta(seconds=1)
            
            print(f"Batch {batch_count}: {len(df_batch)} rows, {batch_start} to {batch_end}")
            
            if batch_count > 50:
                print("Reached maximum batch limit (50). Stopping fetch.")
                break
                
    except Exception as e:
        print(f"Error fetching data: {e}")
        print("Continuing with available data...")

    # 3. Process and Merge
    if df_list:
        df_new = pd.concat(df_list, ignore_index=True)
        df_new['created_at'] = pd.to_datetime(df_new['created_at']).dt.tz_localize(None)
        df_new.set_index('created_at', inplace=True)
        df_new.sort_index(inplace=True)
        
        # Rename columns
        df_new.rename(columns=column_mapping, inplace=True)
        
        # Combine with existing
        if df_existing is not None:
            # Filter new data to only keep what's newer than existing
            latest_existing = df_existing.index.max()
            df_new = df_new[df_new.index > latest_existing]
            
            if not df_new.empty:
                df_streamway = pd.concat([df_existing, df_new]).sort_index()
                # Remove duplicates
                df_streamway = df_streamway[~df_streamway.index.duplicated(keep='first')]
                print(f"Combined: {len(df_existing)} existing + {len(df_new)} new = {len(df_streamway)} total")
                
                # Save to CSV
                df_streamway.to_csv(csv_file)
                print(f"Updated data saved to {csv_file}")
            else:
                print("No new unique data found after filtering.")
                df_streamway = df_existing
        else:
            df_streamway = df_new
            df_streamway.to_csv(csv_file)
            print(f"New data saved to {csv_file}")
    else:
        if df_existing is not None:
            df_streamway = df_existing
            print("No new data fetched.")
        else:
            print("Error: No data available (neither CSV nor API).")
            df_streamway = pd.DataFrame(columns=['streamway_depth_mm'])

# 4. Ensure Numeric and Resample
if not df_streamway.empty:
    # FORCE NUMERIC CONVERSION
    # The API returns strings, and if mixed with float in CSV, it causes object dtype
    df_streamway['streamway_depth_mm'] = pd.to_numeric(df_streamway['streamway_depth_mm'], errors='coerce')
    
    # Drop NaN values that might have resulted from conversion errors
    df_streamway = df_streamway.dropna(subset=['streamway_depth_mm'])
    
    # Resample to 10min intervals
    df_streamway = df_streamway.resample('10min').mean().interpolate(method='time')
    print(f"Resampled data: {len(df_streamway)} rows (10min intervals)")
    print(f"Range: {df_streamway.index.min()} to {df_streamway.index.max()}")


Loading existing data from CSV...
Latest data in CSV: 2025-11-21 17:10:19
Fetching new data from 2025-11-21 17:20:34.637759 back to 2025-11-21 17:10:19
Batch 1: 8000 rows, 2025-10-19 08:20:47 to 2025-11-21 17:20:19
Combined: 104377 existing + 1 new = 104378 total
Updated data saved to streamway_data.csv
Resampled data: 79726 rows (10min intervals)
Range: 2024-05-17 01:50:00 to 2025-11-21 17:20:00


### analyze the change rate of the streamway depth

In [26]:
# --- DEPTH CHANGE DISTRIBUTION ANALYSIS ---
# We analyze how much the streamway depth typically changes over our forecast horizon (4 hours).
# This gives us a baseline for "expected volatility" and helps set reasonable confidence intervals.

# Calculate 4-hour change (24 steps of 10 mins)
horizon_steps = 4 * 6
df_streamway['depth_change_4h'] = df_streamway['streamway_depth_mm'].diff(horizon_steps)

# Calculate Statistics
change_stats = df_streamway['depth_change_4h'].describe()
std_dev_4h = df_streamway['depth_change_4h'].std()
percentiles = df_streamway['depth_change_4h'].quantile([0.01, 0.05, 0.95, 0.99])

print("Statistics for 4-Hour Depth Changes:")
print(f"Mean Change: {change_stats['mean']:.2f} mm")
print(f"Std Dev: {std_dev_4h:.2f} mm")
print(f"Max Rise (4h): {change_stats['max']:.2f} mm")
print(f"Max Drop (4h): {change_stats['min']:.2f} mm")
print("\nPercentiles:")
print(percentiles)

# Plot Distribution
fig = go.Figure()
fig.add_trace(go.Histogram(
    x=df_streamway['depth_change_4h'].dropna(), 
    nbinsx=100, 
    name='4h Change',
    marker_color='teal',
    opacity=0.7
))

# Add lines for Std Dev
fig.add_vline(x=std_dev_4h, line_dash="dash", line_color="orange", annotation_text="+1 Std Dev")
fig.add_vline(x=-std_dev_4h, line_dash="dash", line_color="orange", annotation_text="-1 Std Dev")
fig.add_vline(x=2*std_dev_4h, line_dash="dot", line_color="red", annotation_text="+2 Std Dev")
fig.add_vline(x=-2*std_dev_4h, line_dash="dot", line_color="red", annotation_text="-2 Std Dev")

fig.update_layout(
    title='Distribution of 4-Hour Depth Changes',
    xaxis_title='Change in Depth (mm)',
    yaxis_title='Frequency',
    template='plotly_white',
    bargap=0.1,
    height=500
)
fig.show()

Statistics for 4-Hour Depth Changes:
Mean Change: 0.01 mm
Std Dev: 118.77 mm
Max Rise (4h): 1511.00 mm
Max Drop (4h): -1176.00 mm

Percentiles:
0.01   -315.0
0.05    -94.0
0.95    104.0
0.99    527.0
Name: depth_change_4h, dtype: float64


## 2. Fetch Forecast Data

In [27]:
# Configuration
latitude = 51.8258112
longitude = -3.6611301
timezone = "auto"

# 1. Fetch Historical Forecast Data
start_date = df_streamway.index.min().strftime('%Y-%m-%d')
end_date = (pd.Timestamp.now() - pd.Timedelta(days=2)).strftime('%Y-%m-%d')

print(f"Fetching historical forecast data from {start_date} to {end_date}...")

hist_url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
hist_params = {
    "latitude": latitude,
    "longitude": longitude,
    "start_date": start_date,
    "end_date": end_date,
    "hourly": "precipitation_probability,precipitation",
    "timezone": timezone
}

try:
    response_hist = requests.get(hist_url, params=hist_params)
    data_hist = response_hist.json()
    hourly_hist = data_hist['hourly']
    df_hist = pd.DataFrame({
        'time': pd.to_datetime(hourly_hist['time']),
        'precip_forecast': hourly_hist['precipitation'],
        'precip_prob': hourly_hist['precipitation_probability']
    })
    df_hist.set_index('time', inplace=True)
except Exception as e:
    print(f"Error fetching historical: {e}")
    df_hist = pd.DataFrame()

# 2. Fetch Recent/Live Forecast Data
print("Fetching recent/live forecast data...")
live_url = "https://api.open-meteo.com/v1/forecast"
live_params = {
    "latitude": latitude,
    "longitude": longitude,
    "hourly": "precipitation,precipitation_probability",
    "past_days": 7,
    "forecast_days": 7,
    "timezone": timezone
}

try:
    response_live = requests.get(live_url, params=live_params)
    data_live = response_live.json()
    hourly_live = data_live['hourly']
    df_live = pd.DataFrame({
        'time': pd.to_datetime(hourly_live['time']),
        'precip_forecast': hourly_live['precipitation'],
        'precip_prob': hourly_live['precipitation_probability']
    })
    df_live.set_index('time', inplace=True)
except Exception as e:
    print(f"Error fetching live: {e}")
    df_live = pd.DataFrame()

# Combine
if not df_hist.empty and not df_live.empty:
    df_forecast_combined = pd.concat([df_hist, df_live])
    df_forecast_combined = df_forecast_combined[~df_forecast_combined.index.duplicated(keep='last')]
    df_forecast_combined = df_forecast_combined.sort_index()
    print(f"Combined forecast data range: {df_forecast_combined.index.min()} to {df_forecast_combined.index.max()}")
else:
    df_forecast_combined = df_hist if not df_hist.empty else df_live

Fetching historical forecast data from 2024-05-17 to 2025-11-19...
Fetching recent/live forecast data...
Combined forecast data range: 2024-05-17 00:00:00 to 2025-11-27 23:00:00


# fetch historical weather data to compare against forecast data
## we want to be sure that the forecast data is accurate

In [28]:
# lets pull in open-meteo data for the same time period and location
print(f"Channel location: lat={latitude}, lon={longitude}")

# 1. Fetch Historical Forecast Data
start_date = df_streamway.index.min().strftime('%Y-%m-%d')
end_date = (pd.Timestamp.now() - pd.Timedelta(days=2)).strftime('%Y-%m-%d')

# Define the API endpoint and parameters
url = "https://archive-api.open-meteo.com/v1/archive"

print(f"Fetching weather data from {start_date} to {end_date}")

params = {
    "latitude": latitude,
    "longitude": longitude,
    "hourly": "precipitation",
    "start_date": start_date,
    "end_date": end_date,
    "timezone": "auto"
}

# Make the API request
response = requests.get(url, params=params)

# Check if the request was successful
if response.status_code == 200:
    weather_data = response.json()
    print("Weather data retrieved successfully.")
else:
    print(f"Error retrieving weather data: {response.status_code}")

# Convert weather data to DataFrame
weather_df = pd.DataFrame({
    'time': pd.to_datetime(weather_data['hourly']['time']),
    'precipitation_mm': weather_data['hourly']['precipitation']
})
weather_df.set_index('time', inplace=True)

Channel location: lat=51.8258112, lon=-3.6611301
Fetching weather data from 2024-05-17 to 2025-11-19
Weather data retrieved successfully.


In [29]:
# --- RESIDUAL ANALYSIS ---
# Compare Forecast Precipitation vs Actual Historical Precipitation

print("## Residual Analysis: Forecast vs Actual Precipitation")

# weather_df already loaded in previous cell from Open-Meteo Archive
# df_forecast_combined contains the forecast data

# 1. Prepare Actual Rainfall (Hourly sum)
df_actual_hourly = weather_df['precipitation_mm'].resample('1h').sum()

# 2. Prepare Forecast Rainfall (Hourly sum)
df_forecast_hourly = df_forecast_combined['precip_forecast'].resample('1h').sum()

# 3. Align Data (Inner join to compare only overlapping periods)
comparison_df = pd.DataFrame({'actual': df_actual_hourly, 'forecast': df_forecast_hourly}).dropna()

if not comparison_df.empty:
    # 4. Calculate Residuals (Forecast - Actual)
    comparison_df['residual'] = comparison_df['forecast'] - comparison_df['actual']
    
    print(f"Comparison Period: {comparison_df.index.min()} to {comparison_df.index.max()}")
    print(f"Number of hours compared: {len(comparison_df)}")
    print(f"Mean Residual: {comparison_df['residual'].mean():.4f} mm/hr (Positive = Over-forecast)")
    print(f"MAE: {comparison_df['residual'].abs().mean():.4f} mm/hr")
    print(f"RMSE: {np.sqrt((comparison_df['residual']**2).mean()):.4f} mm/hr")
    
    
    # 5. Plotting with Plotly
    fig = make_subplots(
        rows=2, cols=2,
        specs=[[{"colspan": 2}, None], [{}, {}]],
        subplot_titles=("Actual (Archive) vs Forecast Precipitation (Hourly Sum)", "Residual Distribution", "Actual vs Forecast Scatter"),
        vertical_spacing=0.15
    )

    # Time Series
    fig.add_trace(
        go.Scatter(x=comparison_df.index, y=comparison_df['actual'], name='Actual Rain', line=dict(color='blue', width=1), opacity=0.6),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(x=comparison_df.index, y=comparison_df['forecast'], name='Forecast Rain', line=dict(color='orange', width=1, dash='dash'), opacity=0.6),
        row=1, col=1
    )

    # Histogram
    fig.add_trace(
        go.Histogram(x=comparison_df['residual'], nbinsx=50, name='Residuals', marker_color='purple', opacity=0.7),
        row=2, col=1
    )
    # Vertical line at 0 for histogram
    fig.add_vline(x=0, line_width=2, line_dash="dash", line_color="red", row=2, col=1)

    # Scatter
    fig.add_trace(
        go.Scatter(x=comparison_df['actual'], y=comparison_df['forecast'], mode='markers', name='Scatter', marker=dict(size=5, opacity=0.3, color='green')),
        row=2, col=2
    )
    # Perfect fit line
    max_val = max(comparison_df['actual'].max(), comparison_df['forecast'].max())
    fig.add_trace(
        go.Scatter(x=[0, max_val], y=[0, max_val], mode='lines', name='Perfect Forecast', line=dict(color='red', dash='dash')),
        row=2, col=2
    )

    fig.update_layout(height=800, title_text="Residual Analysis", showlegend=True)
    fig.update_xaxes(title_text="Time", row=1, col=1)
    fig.update_yaxes(title_text="Precipitation (mm/hr)", row=1, col=1)
    fig.update_xaxes(title_text="Residual (mm/hr)", row=2, col=1)
    fig.update_yaxes(title_text="Frequency", row=2, col=1)
    fig.update_xaxes(title_text="Actual (mm/hr)", row=2, col=2)
    fig.update_yaxes(title_text="Forecast (mm/hr)", row=2, col=2)
    
    fig.show()

    # Check for bias
    bias = comparison_df['residual'].mean()
    if abs(bias) > 0.1:
        print(f"\nSignificant bias detected: {bias:.2f} mm/hr")
        if bias > 0:
            print("Forecast tends to OVER-predict rainfall.")
        else:
            print("Forecast tends to UNDER-predict rainfall.")
        print("Consider applying a correction factor to the forecast data.")
    else:
        print(f"\nBias is minimal ({bias:.4f} mm/hr). Forecast appears well-calibrated.")
else:
    print("No overlapping data found between actual and forecast rainfall.")

## Residual Analysis: Forecast vs Actual Precipitation
Comparison Period: 2024-05-17 00:00:00 to 2025-11-19 23:00:00
Number of hours compared: 13248
Mean Residual: 0.0557 mm/hr (Positive = Over-forecast)
MAE: 0.1854 mm/hr
RMSE: 0.6161 mm/hr



Bias is minimal (0.0557 mm/hr). Forecast appears well-calibrated.


## 3. Feature Engineering (Matched)

We replicate the feature engineering from `forecasting_model.ipynb`:
- **Lags**: 2h to 7h ago.
- **Forecasts**: 0h to 3h ahead (relative to prediction time).
- **Target**: Change in depth over 4 hours.

In [30]:
# Resample forecast to 10min (ffill)
df_forecast_10min = df_forecast_combined.resample('10min').ffill()

# Merge
df_merged = df_streamway.join(df_forecast_10min, how='outer')

# --- Feature Engineering ---
prediction_horizon_hours = 4
prediction_steps = prediction_horizon_hours * 6

# 1. Precipitation Lags (2h to 7h)
for h in range(2, 8):
    df_merged[f'precip_lag_{h}h'] = df_merged['precip_forecast'].shift(h * 6)

# 2. Forecast Precipitation (0h to 3h)
# Note: shift(-k) brings future data to current row.
for h in range(0, prediction_horizon_hours):
    df_merged[f'precip_forecast_{h}h'] = df_merged['precip_forecast'].shift(-h * 6)

# 3. Target: Change in Depth (T+4h - T)
df_merged['target_depth'] = df_merged['streamway_depth_mm'].shift(-prediction_steps)
df_merged['target_change'] = df_merged['target_depth'] - df_merged['streamway_depth_mm']

# Drop NaNs for training
# We need target_change (which is NaN at the end) and lags (NaN at start)
df_train_data = df_merged.dropna(subset=['target_change', 'precip_lag_7h'])

print(f"Training data shape: {df_train_data.shape}")

Training data shape: (79671, 17)


## 4. Model Training

In [52]:
features = [f'precip_lag_{h}h' for h in range(2, 8)] + \
           [f'precip_forecast_{h}h' for h in range(0, prediction_horizon_hours)]

target = 'target_change'

X = df_train_data[features]
y = df_train_data[target]

# Split
split_idx = int(len(X) * 0.8)
X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]

print(f"Train set: {X_train.shape}, Test set: {X_test.shape}")

# Train (Using params from forecasting_model.ipynb grid search if available, or defaults)
# Using the ones seen in the notebook inspection: max_depth=5, learning_rate=0.1, n_estimators=100

# Train (Using params from forecasting_model.ipynb grid search if available, or defaults)
# Using the ones seen in the notebook inspection: max_depth=5, learning_rate=0.1, n_estimators=100
model = xgb.XGBRegressor(n_estimators=50, learning_rate=0.1, max_depth=3, subsample=1.0, random_state=42, early_stopping_rounds=10, objective='reg:squarederror')
model.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False)

# Evaluate
pred_change = model.predict(X_test)

# Reconstruct Absolute Depth
# Predicted Depth[T+4h] = Actual Depth[T] + Predicted Change[T->T+4h]
current_depth_test = df_train_data.loc[X_test.index, 'streamway_depth_mm']
pred_depth_reconstructed = current_depth_test + pred_change

y_actual_depth = df_train_data.loc[X_test.index, 'target_depth']

# Calculate Model Metrics
rmse = np.sqrt(mean_squared_error(y_actual_depth, pred_depth_reconstructed))
mae = mean_absolute_error(y_actual_depth, pred_depth_reconstructed)
r2 = r2_score(y_actual_depth, pred_depth_reconstructed)

# Calculate Baseline Metrics (Persistence: Predict 0 change)
# If we predicted 0 change, the error would be exactly the actual change
# Baseline RMSE = sqrt(mean(actual_change^2))
# Baseline MAE = mean(|actual_change|)
# We use the Test Set for a fair comparison
baseline_changes = df_train_data.loc[X_test.index, 'target_change']
baseline_rmse = np.sqrt((baseline_changes**2).mean())
baseline_mae = baseline_changes.abs().mean()

# Formatting Helper
def format_score(name, score, baseline):
    # Green if < 90% of baseline (solid improvement)
    # Orange if < 100% of baseline (marginal)
    # Red if >= baseline (worse/useless)
    
    ratio = score / baseline
    
    GREEN = "\033[92m"
    ORANGE = "\033[93m"
    RED = "\033[91m"
    RESET = "\033[0m"
    
    if ratio < 0.9:
        color = GREEN
        status = "GOOD"
    elif ratio < 1.0:
        color = ORANGE
        status = "BORDERLINE"
    else:
        color = RED
        status = "BAD"
        
    return f"{color}{name}: {score:.2f} (Baseline: {baseline:.2f}) [{status} - {ratio:.1%}] {RESET}"

print("--- Model Evaluation (vs Persistence Baseline) ---")
print(format_score("RMSE", rmse, baseline_rmse))
print(format_score("MAE ", mae, baseline_mae))
print(format_score("R2 ", r2, 1.0))

# Plot Test Results with Plotly
fig = go.Figure()

# Validity Time
validity_time = X_test.index + pd.Timedelta(hours=4)

fig.add_trace(go.Scatter(x=validity_time, y=y_actual_depth, name='Actual Depth', line=dict(color='blue'), opacity=0.7))
fig.add_trace(go.Scatter(x=validity_time, y=pred_depth_reconstructed, name='Predicted Depth (4h Lead)', line=dict(color='orange', dash='dash'), opacity=0.7))

fig.update_layout(
    title=f'Streamway Depth Forecast (4-Hour Horizon, Test Set) RMSE: {rmse:.2f}',
    xaxis_title='Time',
    yaxis_title='Depth (mm)',
    template='plotly_white',
    height=500
)
fig.show()


Train set: (63736, 10), Test set: (15935, 10)
--- Model Evaluation (vs Persistence Baseline) ---
[92mRMSE: 94.42 (Baseline: 118.82) [GOOD - 79.5%] [0m
[92mMAE : 45.98 (Baseline: 51.67) [GOOD - 89.0%] [0m
[92mR2 : 0.88 (Baseline: 1.00) [GOOD - 87.6%] [0m


In [42]:

    # --- HYPERPARAMETER TUNING & VALIDATION ---
    from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
    
    print("## Performing Hyperparameter Tuning (Grid Search with TimeSeriesSplit)")
    
    # Define the parameter grid
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1, 0.2],
        'subsample': [0.8, 1.0],
        'early_stopping_rounds': [5, 10, 15]
    }
    
    # Use TimeSeriesSplit for validation to respect temporal order
    # 3 splits means we train on chunk 1, validate on 2; train on 1+2, validate on 3, etc.
    tscv = TimeSeriesSplit(n_splits=3)
    
    xgb_model = xgb.XGBRegressor(random_state=42)
    
    grid_search = GridSearchCV(
        estimator=xgb_model,
        param_grid=param_grid,
        cv=tscv,
        scoring='neg_mean_squared_error',
        verbose=1,
        n_jobs=-1
    )
    
    # Fit Grid Search
    # This might take a minute
    print(f"Starting Grid Search on {len(X_train)} training samples...")
    grid_search.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False)
    
    print(f"Best Parameters: {grid_search.best_params_}")
    print(f"Best RMSE (CV): {np.sqrt(-grid_search.best_score_):.4f}")
    
    # Update the 'model' variable to use the best estimator
    model = grid_search.best_estimator_
    
    # We can now proceed to evaluate this best model on the Test Set (which was held out completely)


## Performing Hyperparameter Tuning (Grid Search with TimeSeriesSplit)
Starting Grid Search on 63736 training samples...
Fitting 3 folds for each of 162 candidates, totalling 486 fits
Best Parameters: {'early_stopping_rounds': 10, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 50, 'subsample': 1.0}
Best RMSE (CV): 109.5036


## 5. Recent & Future Forecast

We apply the model to recent data to see how it performs now.

In [43]:
# Define Range: Recent Past + Future
recent_start = pd.Timestamp.now() - pd.Timedelta(days=3)
future_end = pd.Timestamp.now() + pd.Timedelta(days=2)

# We need data that covers this range. 
# Note: To predict for time T (validity), we need input at T-4h.
# So we select inputs from recent_start-4h to future_end-4h.

input_start = recent_start - pd.Timedelta(hours=4)
input_end = future_end - pd.Timedelta(hours=4)

X_recent = df_merged.loc[input_start:input_end, features].dropna()

if not X_recent.empty:
    # Predict Change
    pred_change_recent = model.predict(X_recent)
    
    # Get Current Depth for these times
    # Note: For future times, we might NOT have 'streamway_depth_mm'.
    # This is a limitation of the 'Change' model - it needs a starting point.
    # For the 'Future' part (where we don't have current depth), we have to use the *Predicted* depth from 4 hours ago?
    # Or we simply say: "This model provides a 4-hour warning based on KNOWN current depth".
    # If we want to predict >4 hours into the future, we would need to chain predictions, which accumulates error.
    # For now, let's plot what we can: Predictions where we have a known starting depth.
    
    # Filter to where we have actual depth
    valid_indices = X_recent.index.intersection(df_streamway.index)
    
    if len(valid_indices) > 0:
        X_valid = X_recent.loc[valid_indices]
        pred_change_valid = model.predict(X_valid)
        current_depth_valid = df_merged.loc[valid_indices, 'streamway_depth_mm']
        
        pred_depth_valid = current_depth_valid + pred_change_valid
        validity_time_valid = valid_indices + pd.Timedelta(hours=4)
        
        
        
        
        # Calculate Confidence Intervals (95%)
        # We prefer using the Model's RMSE on the test set (prediction error).
        # If not available, we use the natural volatility of the stream (std_dev_4h) as a proxy for uncertainty.
        try:
            model_uncertainty = rmse
            uncertainty_source = "Model RMSE"
        except NameError:
            try:
                model_uncertainty = std_dev_4h
                uncertainty_source = "Stream Volatility (Std Dev)"
            except NameError:
                # Fallback if neither is defined (e.g. running out of order)
                model_uncertainty = 10.0 # Default fallback
                uncertainty_source = "Default"

        # 95% Confidence Interval
        confidence_interval = 1.96 * model_uncertainty

        upper_band = pred_depth_valid + confidence_interval
        lower_band = pred_depth_valid - confidence_interval

        # Plot with Plotly
        fig = go.Figure()

        # Confidence Bands (Upper Bound)
        fig.add_trace(go.Scatter(
        x=validity_time_valid,
        y=upper_band,
        mode='lines',
        line=dict(width=0),
        showlegend=False,
        name='Upper Bound'
        ))

        # Confidence Bands (Lower Bound)
        fig.add_trace(go.Scatter(
        x=validity_time_valid,
        y=lower_band,
        mode='lines',
        line=dict(width=0),
        fill='tonexty', # Fill to the trace before it (Upper Bound)
        fillcolor='rgba(255, 0, 0, 0.2)',
        name='95% Confidence Interval'
        ))

        # Actual Depth
        fig.add_trace(go.Scatter(
        x=df_streamway.loc[recent_start:].index,
        y=df_streamway.loc[recent_start:]['streamway_depth_mm'],
        name='Actual Depth',
        line=dict(color='blue', width=2),
        opacity=0.6
        ))

        # Forecast
        fig.add_trace(go.Scatter(
        x=validity_time_valid,
        y=pred_depth_valid,
        name='Forecast (4h Lead)',
        line=dict(color='red', width=2, dash='dash')
        ))

        # Now Line
        fig.add_vline(x=pd.Timestamp.now().timestamp() * 1000, line_width=2, line_dash="dot", line_color="green", annotation_text="Now")

        fig.update_layout(
        title=f'Streamway Depth Forecast (4-Hour Warning) +/- {confidence_interval:.1f}mm ({uncertainty_source})',
        xaxis_title='Time',
        yaxis_title='Depth (mm)',
        template='plotly_white',
        height=500,
        hovermode='x unified'
        )

        fig.show()
else:
    print("No recent data found.")

In [44]:

# --- RECURSIVE FORECAST EVALUATION (Multi-Day) ---
# We evaluate the model's ability to forecast out to 3 days (72 hours) by accumulating 4-hour changes.
# Since our model only depends on precipitation features (which are available for the whole horizon),
# we don't need to feed predictions back as inputs. We just sum the predicted changes.

print("## Recursive Forecast Evaluation (up to 3 Days)")

def generate_recursive_forecast(start_time, start_depth, hours=72):
    # Generate timestamps: start, start+4h, ..., start+(hours-4)h
    # We predict change for [t, t+4h]
    timestamps = [start_time + pd.Timedelta(hours=4*i) for i in range(hours // 4)]
    
    # Get features for these timestamps
    # We need to ensure these timestamps exist in df_merged or we can interpolate/reindex
    # Since df_merged is 10min freq, exact matches should exist if start_time is aligned.
    
    # Align start_time to nearest 10min if needed
    start_time = start_time.round('10min')
    timestamps = [start_time + pd.Timedelta(hours=4*i) for i in range(hours // 4)]
    
    # Check if all timestamps are in df_merged
    valid_timestamps = [t for t in timestamps if t in df_merged.index]
    
    if not valid_timestamps:
        return None, None
        
    X_batch = df_merged.loc[valid_timestamps, features]
    
    if X_batch.isnull().values.any():
        # If features are missing (e.g. end of data), we can't predict further
        # We truncate to valid data
        X_batch = X_batch.dropna()
        
    if X_batch.empty:
        return None, None
        
    # Predict changes
    pred_changes = model.predict(X_batch)
    
    # Accumulate changes
    cum_changes = np.cumsum(pred_changes)
    
    # Calculate absolute depths
    pred_depths = start_depth + cum_changes
    
    # Result DataFrame
    forecast_times = [t + pd.Timedelta(hours=4) for t in X_batch.index]
    df_res = pd.DataFrame({
        'forecast_time': forecast_times,
        'pred_depth': pred_depths,
        'step_hours': [(i+1)*4 for i in range(len(pred_depths))]
    }).set_index('forecast_time')
    
    return df_res

# 1. Evaluate on Test Set (Sampled)
# We pick N random points in the test set where we have 72h of future data
eval_horizon_hours = 72
n_samples = 50

# Filter test set to ensure we have enough runway
test_end = df_train_data.index.max()
valid_starts = X_test.index[X_test.index < (test_end - pd.Timedelta(hours=eval_horizon_hours))]

if len(valid_starts) > n_samples:
    # Pick random samples or spaced samples
    # Let's pick spaced samples to cover different conditions
    indices = np.linspace(0, len(valid_starts)-1, n_samples, dtype=int)
    sample_starts = valid_starts[indices]
else:
    sample_starts = valid_starts
    
errors = []
examples_to_plot = []

print(f"Evaluating on {len(sample_starts)} samples from Test Set...")

for start_t in sample_starts:
    start_depth = df_train_data.loc[start_t, 'streamway_depth_mm']
    
    # Generate Forecast
    df_fc = generate_recursive_forecast(start_t, start_depth, hours=eval_horizon_hours)
    
    if df_fc is not None:
        # Get Actuals
        actuals = df_train_data.loc[df_fc.index, 'streamway_depth_mm']
        
        # Calculate Errors
        # Align indices
        common_idx = df_fc.index.intersection(actuals.index)
        if not common_idx.empty:
            df_compare = pd.DataFrame({
                'pred': df_fc.loc[common_idx, 'pred_depth'],
                'actual': actuals.loc[common_idx],
                'step_hours': df_fc.loc[common_idx, 'step_hours']
            })
            df_compare['error'] = df_compare['pred'] - df_compare['actual']
            df_compare['abs_error'] = df_compare['error'].abs()
            df_compare['baseline_abs_error'] = (start_depth - df_compare['actual']).abs()
            errors.append(df_compare)
            # Collect all for percentile selection
            examples_to_plot.append({'start': start_t, 'df': df_compare, 'mae': df_compare['abs_error'].mean()})
            
if errors:
    all_errors = pd.concat(errors)
    
    # Group by Horizon
    metrics_by_horizon = all_errors.groupby('step_hours')[['abs_error', 'baseline_abs_error']].agg(['mean', 'count'])
    # Flatten MultiIndex columns if needed or access correctly
    # Actually groupby on list returns MultiIndex columns (abs_error, mean), (abs_error, count), etc.
    # Let's simplify:
    metrics_by_horizon = all_errors.groupby('step_hours').agg({'abs_error': 'mean', 'baseline_abs_error': 'mean', 'error': lambda x: np.sqrt((x**2).mean())})
    metrics_by_horizon.rename(columns={'abs_error': 'MAE', 'baseline_abs_error': 'Baseline MAE', 'error': 'RMSE'}, inplace=True)
    # RMSE calculated in agg above
    
    print("\nPerformance by Horizon:")
    print(metrics_by_horizon[['MAE', 'RMSE', 'Baseline MAE']])
    
    # Plot Error Growth
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=metrics_by_horizon.index, 
        y=metrics_by_horizon['MAE'],
        mode='lines+markers',
        name='MAE'
    ))
    fig.add_trace(go.Scatter(
        x=metrics_by_horizon.index, 
        y=metrics_by_horizon['RMSE'],
        mode='lines+markers',
        name='RMSE'
    ))
    
    fig.add_trace(go.Scatter(
        x=metrics_by_horizon.index, 
        y=metrics_by_horizon['Baseline MAE'],
        mode='lines+markers',
        name='Baseline MAE (Persistence)',
        line=dict(dash='dash', color='gray')
    ))
fig.update_layout(
        title='Forecast Error Growth over Time (Recursive)',
        xaxis_title='Forecast Horizon (Hours)',
        yaxis_title='Error (mm)',
        template='plotly_white'
    )
fig.show()
    
# Plot Example Forecasts

if examples_to_plot:
    # Sort by MAE
    examples_to_plot.sort(key=lambda x: x['mae'])
    n = len(examples_to_plot)
    
    # Select 5th (Best), 10th (Good), 50th (Median), 90th (Bad), 95th (Worst) percentiles
    # 10th percentile is index n*0.1 (Low Error = Good)
    # 90th percentile is index n*0.9 (High Error = Bad)
    
    indices = [int(n*0.05), int(n*0.1), int(n*0.5), int(n*0.9), int(n*0.95)]
    selected_examples = [examples_to_plot[i] for i in indices if i < n]
    
    labels = ["5th Percentile (Best Case)", "10th Percentile (Good Case)", "50th Percentile (Median Case)", "90th Percentile (Bad Case)", "95th Percentile (Worst Case)"]
    
    print(f"\n## Visualizing Representative Forecasts (5th, 10th, 50th, 90th, 95th Percentiles of MAE)")
    for i, ex in enumerate(selected_examples):
        start_t = ex['start']
        df_comp = ex['df']
        mae = ex['mae']
        label = labels[i] if i < len(labels) else f"Example {i}"
    
        fig = go.Figure()
        
        # Context: 24h before
        # We need to access df_train_data. 
        # Ensure start_t is valid timestamp
        context_start = start_t - pd.Timedelta(hours=24)
        # We assume df_train_data is available
        if 'df_train_data' in locals():
            context_data = df_train_data.loc[context_start:start_t]
            fig.add_trace(go.Scatter(x=context_data.index, y=context_data['streamway_depth_mm'], name='History', line=dict(color='gray', width=1)))
        
        fig.add_trace(go.Scatter(x=df_comp.index, y=df_comp['actual'], name='Actual (Future)', line=dict(color='blue')))
        fig.add_trace(go.Scatter(x=df_comp.index, y=df_comp['pred'], name='Forecast (Recursive)', line=dict(color='red', dash='dash', width=2)))
        
        fig.add_vline(x=start_t.timestamp() * 1000, line_dash="dot", line_color="green", annotation_text="Start")
        
        fig.update_layout(
            title=f'{label} - MAE: {mae:.2f}mm (Start: {start_t})',
            xaxis_title='Time',
            yaxis_title='Depth (mm)',
            template='plotly_white',
            height=400
        )
        fig.show()
    
    

# --- DENSE RECURSIVE FORECAST (High Resolution) ---
# To get a smooth line, we run 24 parallel recursive chains, interleaved.
# Chain 0: Predicts T+4h, T+8h, T+12h... based on T
# Chain 1: Predicts T+4h+10m, T+8h+10m... based on T+10m
# ...
# Chain 23: Predicts T+7h50m... based on T+3h50m

def generate_dense_forecast(start_time, hours=72):
    all_preds = []

    # We need to ensure we have features for the entire future horizon
    # Horizon end = start_time + hours
    # We need features up to Horizon end - 4h

    # We iterate through the first 4-hour window of "targets"
    # Offsets: 10min, 20min, ..., 4h (24 steps)
    steps_per_horizon = 4 * 6 # 24 steps

    print(f"Generating dense forecast for {hours} hours ({hours*6} steps)...")

    for k in range(1, steps_per_horizon + 1):
        # The first target time for this chain
        first_target = start_time + pd.Timedelta(minutes=10*k)
        
        # The base time for this first target (4h ago)
        # This base time is in the PAST (or Now), so we have actual depth.
        base_time = first_target - pd.Timedelta(hours=4)
        
        # Get the starting depth from actuals
        # We use 'asof' or nearest to handle slight misalignments, but exact match expected
        if base_time in df_streamway.index:
            current_depth = df_streamway.loc[base_time, 'streamway_depth_mm']
        else:
            # Fallback: interpolate or nearest
            # Assuming df_streamway is sorted
            idx = df_streamway.index.get_indexer([base_time], method='nearest')[0]
            current_depth = df_streamway.iloc[idx]['streamway_depth_mm']
        
        # Now recurse forward for this chain
        # We want predictions at: first_target, first_target+4h, first_target+8h...
        # until we exceed start_time + hours
        
        chain_times = []
        current_t = first_target
        while current_t <= start_time + pd.Timedelta(hours=hours):
            chain_times.append(current_t)
            current_t += pd.Timedelta(hours=4)
        
        # Prepare features for this chain
        # We need features at (chain_time - 4h) for each chain_time
        feature_times = [t - pd.Timedelta(hours=4) for t in chain_times]
        
        # Check if feature times exist in df_merged
        valid_feature_times = [t for t in feature_times if t in df_merged.index]
        
        if not valid_feature_times:
            continue
            
        # Extract features
        X_batch = df_merged.loc[valid_feature_times, features]
        
        if X_batch.empty:
            continue
            
        # Predict Changes
        pred_changes = model.predict(X_batch)
        
        # Accumulate Changes
        # Depth[i] = Depth[i-1] + Change[i]
        # But here, Depth[0] = current_depth + Change[0]
        # Depth[1] = Depth[0] + Change[1] ...
        
        cum_changes = np.cumsum(pred_changes)
        chain_depths = current_depth + cum_changes
        
        # Store results
        for t, d in zip(valid_feature_times, chain_depths):
            # t is the feature time (base). The prediction is for t + 4h
            pred_time = t + pd.Timedelta(hours=4)
            all_preds.append({'forecast_time': pred_time, 'pred_depth': d})
            
    # Create DataFrame
    if not all_preds:
        return None
        
    df_res = pd.DataFrame(all_preds).sort_values('forecast_time').set_index('forecast_time')

    # Calculate 'step_hours' for confidence intervals
    # step_hours = (forecast_time - start_time) in hours
    df_res['step_hours'] = (df_res.index - start_time).total_seconds() / 3600

    return df_res

# --- EXECUTE DENSE FORECAST ---
print("\n## Future Forecast (Next 3 Days - Dense)")

now = pd.Timestamp.now().floor('10min')
last_valid_idx = df_streamway['streamway_depth_mm'].last_valid_index()
start_time = last_valid_idx

print(f"Starting Forecast from: {start_time}")

# Generate Dense Forecast
df_future_fc = generate_dense_forecast(start_time, hours=72)

if df_future_fc is not None:
    # Plot Future Forecast
    fig = go.Figure()

    # Recent History (last 2 days)
    history_start = start_time - pd.Timedelta(days=2)
    hist_data = df_streamway.loc[history_start:start_time]

fig.add_trace(go.Scatter(
    x=hist_data.index,
    y=hist_data['streamway_depth_mm'],
    name='History',
    line=dict(color='blue')
))

# Forecast
fig.add_trace(go.Scatter(
    x=df_future_fc.index,
    y=df_future_fc['pred_depth'],
    name='Forecast (Recursive)',
    line=dict(color='red', dash='dash', width=2)
))

# Confidence Bands
if errors:
    # Map horizon to RMSE using interpolation for dense steps
    # metrics_by_horizon has indices 4, 8, 12...
    # We need to interpolate for 4.16, 4.33...
    
    # Create interpolation function
    from scipy.interpolate import interp1d
    
    x_rmse = metrics_by_horizon.index.values
    y_rmse = metrics_by_horizon['RMSE'].values
    
    # Extend interpolation range to cover 0 and max
    x_rmse = np.insert(x_rmse, 0, 0)
    y_rmse = np.insert(y_rmse, 0, 0) # Assume 0 error at t=0
    
    f_rmse = interp1d(x_rmse, y_rmse, kind='linear', fill_value="extrapolate")
    
    uncertainties = f_rmse(df_future_fc['step_hours'])
    
    upper = df_future_fc['pred_depth'] + 1.96 * uncertainties
    lower = df_future_fc['pred_depth'] - 1.96 * uncertainties
    
    fig.add_trace(go.Scatter(
        x=df_future_fc.index, y=upper, mode='lines', line=dict(width=0), showlegend=False, name='Upper'
    ))
    fig.add_trace(go.Scatter(
        x=df_future_fc.index, y=lower, mode='lines', line=dict(width=0), fill='tonexty', fillcolor='rgba(255,0,0,0.1)', name='95% CI'
    ))
    
fig.add_vline(x=start_time.timestamp() * 1000, line_dash="dot", line_color="green", annotation_text="Start")

fig.update_layout(
    title='Streamway Depth Forecast (Next 72 Hours) - Dense',
    xaxis_title='Time',
    yaxis_title='Depth (mm)',
    template='plotly_white',
    height=600
)
fig.show()


## Recursive Forecast Evaluation (up to 3 Days)
Evaluating on 50 samples from Test Set...

Performance by Horizon:
                   MAE        RMSE  Baseline MAE
step_hours                                      
4            48.649784   91.291715     45.551617
8            79.111278  132.582091     83.851018
12           96.350249  158.881172    109.670419
16          112.015113  183.091965    101.976847
20          116.502595  195.094237    119.112465
24          131.359650  210.876697    139.608623
28          138.208999  223.453699    132.288024
32          158.097891  237.187918    125.420000
36          166.017440  258.277549    133.560000
40          172.813572  274.816197    186.920000
44          170.132591  265.867265    168.164762
48          179.608662  270.569917    160.352381
52          184.843712  274.785052    165.740000
56          182.140771  262.742939    145.360000
60          192.813241  282.341315    148.488024
64          203.187722  284.325841    164.248623
68 


## Visualizing Representative Forecasts (5th, 10th, 50th, 90th, 95th Percentiles of MAE)



## Future Forecast (Next 3 Days - Dense)
Starting Forecast from: 2025-11-21 17:20:00
Generating dense forecast for 72 hours (432 steps)...
