# Time Series Forecasting & Backtesting - Volve Production

This notebook demonstrates:
- Baseline vs. improved forecasting models
- Rolling-origin backtesting methodology
- Model performance comparison
- Production forecasts for the Volve field

In [None]:
# Setup
import sys
from pathlib import Path

project_root = Path.cwd().parent
sys.path.insert(0, str(project_root))

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from src.data_prep import load_processed_data, aggregate_total_production
from src.forecasting import (
    forecast_series, 
    seasonal_naive_forecast, 
    exponential_smoothing_forecast,
    get_historical_with_forecast
)
from src.evaluation import (
    rolling_origin_backtest,
    compute_backtest_metrics,
    evaluate_models,
    save_metrics
)
from src.config import PROCESSED_DATA_DIR

pd.set_option('display.float_format', '{:,.2f}'.format)

## 1. Load Data

In [None]:
# Load processed data
df = load_processed_data()
print(f"Loaded {len(df)} records for {df['wellbore'].nunique()} wellbores")
print(f"Date range: {df['date'].min()} to {df['date'].max()}")

In [None]:
# Get total production series for forecasting
total_df = aggregate_total_production(df)
total_df = total_df.sort_values('date')

# Create series with date index
oil_series = total_df.set_index('date')['oil']
print(f"\nTotal production series: {len(oil_series)} months")

In [None]:
# Visualize the series
fig = px.line(total_df, x='date', y='oil',
              title='Total Oil Production - Historical Data',
              labels={'oil': 'Oil (Sm続)', 'date': 'Date'})
fig.show()

## 2. Forecasting Models

### Model 1: Seasonal Naive (Baseline)
Predicts using the value from the same month in the previous year.

### Model 2: Exponential Smoothing (ETS)
Holt-Winters method that captures trend and seasonality.

In [None]:
# Generate forecasts with both models
horizon = 6  # 6 months ahead

# Baseline: Seasonal Naive
baseline_forecast = seasonal_naive_forecast(oil_series, horizon=horizon)
print("Baseline (Seasonal Naive) Forecast:")
baseline_forecast

In [None]:
# Improved: Exponential Smoothing
ets_forecast = exponential_smoothing_forecast(oil_series, horizon=horizon)
print("ETS Forecast:")
ets_forecast

In [None]:
# Plot forecasts comparison
fig = go.Figure()

# Historical
fig.add_trace(go.Scatter(
    x=total_df['date'], y=total_df['oil'],
    mode='lines', name='Historical',
    line=dict(color='#2E86AB', width=2)
))

# Baseline forecast
fig.add_trace(go.Scatter(
    x=baseline_forecast['date'], y=baseline_forecast['yhat'],
    mode='lines+markers', name='Baseline (Seasonal Naive)',
    line=dict(color='#F18F01', width=2, dash='dash')
))

# ETS forecast
fig.add_trace(go.Scatter(
    x=ets_forecast['date'], y=ets_forecast['yhat'],
    mode='lines+markers', name='ETS',
    line=dict(color='#E94F37', width=2, dash='dot')
))

# ETS confidence interval
if 'yhat_lower' in ets_forecast.columns:
    fig.add_trace(go.Scatter(
        x=pd.concat([ets_forecast['date'], ets_forecast['date'][::-1]]),
        y=pd.concat([ets_forecast['yhat_upper'], ets_forecast['yhat_lower'][::-1]]),
        fill='toself', fillcolor='rgba(233, 79, 55, 0.2)',
        line=dict(color='rgba(255,255,255,0)'),
        name='ETS 95% CI'
    ))

fig.update_layout(
    title='Forecast Comparison: Baseline vs ETS',
    xaxis_title='Date', yaxis_title='Oil (Sm続)',
    legend=dict(yanchor='top', y=0.99, xanchor='left', x=0.01)
)
fig.show()

## 3. Rolling-Origin Backtesting

To properly evaluate forecast accuracy, we use rolling-origin backtesting:
1. Train on data up to time t
2. Forecast for time t+1
3. Compare to actual value
4. Move forward and repeat

In [None]:
# Run backtest for baseline model
backtest_baseline = rolling_origin_backtest(
    df, 
    target_col='oil',
    series_id='TOTAL',
    model='baseline',
    test_periods=12,
    forecast_horizon=1
)

print(f"Baseline backtest: {len(backtest_baseline)} test points")
backtest_baseline.head()

In [None]:
# Run backtest for ETS model
backtest_ets = rolling_origin_backtest(
    df,
    target_col='oil',
    series_id='TOTAL',
    model='ets',
    test_periods=12,
    forecast_horizon=1
)

print(f"ETS backtest: {len(backtest_ets)} test points")
backtest_ets.head()

In [None]:
# Visualize backtest results
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=backtest_baseline['date'], y=backtest_baseline['actual'],
    mode='lines+markers', name='Actual',
    line=dict(color='#2E86AB', width=2)
))

fig.add_trace(go.Scatter(
    x=backtest_baseline['date'], y=backtest_baseline['predicted'],
    mode='lines+markers', name='Baseline Predicted',
    line=dict(color='#F18F01', width=2, dash='dash')
))

fig.add_trace(go.Scatter(
    x=backtest_ets['date'], y=backtest_ets['predicted'],
    mode='lines+markers', name='ETS Predicted',
    line=dict(color='#E94F37', width=2, dash='dot')
))

fig.update_layout(
    title='Backtest Results: Actual vs Predicted',
    xaxis_title='Date', yaxis_title='Oil (Sm続)'
)
fig.show()

## 4. Model Performance Metrics

In [None]:
# Compute metrics
metrics_baseline = compute_backtest_metrics(backtest_baseline)
metrics_ets = compute_backtest_metrics(backtest_ets)

metrics_comparison = pd.DataFrame([
    {'Model': 'Baseline (Seasonal Naive)', **metrics_baseline},
    {'Model': 'ETS (Exponential Smoothing)', **metrics_ets}
])

metrics_comparison

In [None]:
# Visualize metrics
fig = make_subplots(rows=1, cols=3, subplot_titles=['MAE', 'MAPE (%)', 'RMSE'])

models = ['Baseline', 'ETS']
colors = ['#F18F01', '#E94F37']

fig.add_trace(go.Bar(x=models, y=[metrics_baseline['mae'], metrics_ets['mae']], 
                     marker_color=colors), row=1, col=1)
fig.add_trace(go.Bar(x=models, y=[metrics_baseline['mape'], metrics_ets['mape']], 
                     marker_color=colors), row=1, col=2)
fig.add_trace(go.Bar(x=models, y=[metrics_baseline['rmse'], metrics_ets['rmse']], 
                     marker_color=colors), row=1, col=3)

fig.update_layout(title='Model Performance Comparison', showlegend=False, height=400)
fig.show()

## 5. Wellbore-Level Forecasts

In [None]:
# Evaluate models for all wellbores
all_metrics = evaluate_models(
    df,
    target_col='oil',
    series_ids=None,  # All wellbores + TOTAL
    test_periods=6,
    forecast_horizon=1
)

# Pivot for comparison
metrics_pivot = all_metrics.pivot_table(
    index='series_id', 
    columns='model', 
    values=['mae', 'mape']
).round(2)

metrics_pivot

In [None]:
# Save metrics
save_metrics(all_metrics)
print(f"Metrics saved to: {PROCESSED_DATA_DIR / 'metrics.json'}")

## 6. Generate Final Forecasts

In [None]:
# Generate 6-month forecast using best model (ETS)
final_forecast = forecast_series(
    df,
    target_col='oil',
    series_id='TOTAL',
    model='ets',
    horizon=6
)

print("Final 6-Month Forecast (Total Field):")
final_forecast

In [None]:
# Final visualization
historical, forecast = get_historical_with_forecast(df, final_forecast, 'TOTAL', 'oil')

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=historical['date'], y=historical['oil'],
    mode='lines', name='Historical',
    line=dict(color='#2E86AB', width=2)
))

fig.add_trace(go.Scatter(
    x=forecast['date'], y=forecast['yhat'],
    mode='lines+markers', name='Forecast',
    line=dict(color='#E94F37', width=2, dash='dash'),
    marker=dict(size=10)
))

if 'yhat_lower' in forecast.columns:
    fig.add_trace(go.Scatter(
        x=pd.concat([forecast['date'], forecast['date'][::-1]]),
        y=pd.concat([forecast['yhat_upper'], forecast['yhat_lower'][::-1]]),
        fill='toself', fillcolor='rgba(233, 79, 55, 0.2)',
        line=dict(color='rgba(255,255,255,0)'),
        name='95% Confidence Interval'
    ))

fig.update_layout(
    title='Volve Field Oil Production Forecast',
    xaxis_title='Date', yaxis_title='Oil (Sm続)',
    legend=dict(yanchor='top', y=0.99, xanchor='left', x=0.01)
)
fig.show()

## 7. Summary

### Model Performance
| Model | MAE | MAPE | RMSE |
|-------|-----|------|------|
| Baseline (Seasonal Naive) | See above | See above | See above |
| ETS (Exponential Smoothing) | See above | See above | See above |

### Key Findings
1. **ETS outperforms baseline** - Captures declining trend better than seasonal naive
2. **Production decline** - Forecasts confirm continued decline in field production
3. **Uncertainty grows** - Confidence intervals widen for longer horizons

### Recommendations
- Use ETS model for operational forecasting
- Monitor forecast accuracy monthly and retrain
- Consider adding external factors (maintenance schedules, reservoir pressure) for improved accuracy

### Next Steps
- Run the Streamlit dashboard for interactive exploration
- Set up Power Automate for automated reporting
- Review email_summary.txt for stakeholder communication