# UK Gas Demand Forecasting â€“ Production Pipeline

This notebook implements a clean, repeatable forecasting pipeline
using real UK gas demand and weather data. It reuses validated logic
from the exploration notebook without ad-hoc analysis.


In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error


In [2]:
# Load demand
demand = pd.read_csv(
    "../data/raw/uk_gas_demand_daily.csv",
    parse_dates=["Applicable For", "Generated Time"],
    dayfirst=True
)

demand = demand[demand["Data Item"] == "Demand Actual, NTS, D+6"]
demand = (
    demand.sort_values("Generated Time")
          .groupby("Applicable For", as_index=False)
          .last()
)

demand = demand.rename(columns={"Applicable For": "date", "Value": "demand_mscm"})
demand["demand_gwh"] = demand["demand_mscm"] * 11.078
demand = demand[["date", "demand_gwh"]]


In [3]:
# Load weather
weather = pd.read_csv(
    "../data/raw/uk_temperature_daily.csv",
    sep=r"\s+",
    engine="python"
)

weather = weather.rename(columns={"Date": "date", "Value": "mean_temp"})
weather["date"] = pd.to_datetime(weather["date"])


In [4]:
# Merge
df = demand.merge(weather, on="date", how="inner")


In [5]:
# HDD
BASE_TEMP = 15.5
df["hdd"] = (BASE_TEMP - df["mean_temp"]).clip(lower=0)


In [6]:
# Features
df["demand_lag_1"] = df["demand_gwh"].shift(1)
df["demand_lag_7"] = df["demand_gwh"].shift(7)
df["demand_roll_7"] = df["demand_gwh"].rolling(7).mean()

df_model = df.dropna().copy()


In [7]:
# Baseline
baseline_pred = df_model["demand_lag_1"]
mae_baseline = mean_absolute_error(df_model["demand_gwh"], baseline_pred)
mae_baseline


140.19242163436712

In [8]:
# Linear Regression
features = ["hdd", "demand_lag_1", "demand_lag_7", "demand_roll_7"]
X = df_model[features]
y = df_model["demand_gwh"]

lr = LinearRegression()
lr.fit(X, y)

lr_pred = lr.predict(X)
mae_lr = mean_absolute_error(y, lr_pred)
mae_lr


129.48417560393793

In [9]:
pd.Series(lr.coef_, index=features).sort_values()


demand_lag_7      0.010318
demand_roll_7     0.233519
demand_lag_1      0.601713
hdd              26.232435
dtype: float64