# Time Series Forecasting with PyCaret
This notebook demonstrates two basic univariate forecasting workflows on `DailyDelhiClimateTrain.csv` using PyCaret:
- Univariate forecasting without exogenous variables
- Univariate forecasting with exogenous variables

References: PyCaret Time Series Tutorial

Steps:
1) Load and prepare the data
2) Univariate forecasting (no exog)
3) Univariate forecasting with exogenous variables
4) Save models and preview forecasts

In [4]:
!pip install --quiet pycaret

Collecting pycaret
  Downloading pycaret-3.3.2-py3-none-any.whl.metadata (17 kB)
Collecting pandas<2.2.0 (from pycaret)
  Downloading pandas-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting scipy<=1.11.4,>=1.6.1 (from pycaret)
  Downloading scipy-1.11.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting joblib<1.4,>=1.2.0 (from pycaret)
  Downloading joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)
Collecting scikit-learn>1.4.0 (from pycaret)
  Downloading scikit_learn-1.7.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Collecting pyod>=1.1.3 (from pycaret)
  Downloading pyod-2.0.5-py3-none-any.whl.metadata (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.3/46.3 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Collecting deprecation

In [5]:
# Imports
import pandas as pd
import numpy as np

In [6]:
# Load and prepare the dataset
df = pd.read_csv('/kaggle/input/daily-climate-time-series-data/DailyDelhiClimateTrain.csv', parse_dates=['date'])
df = df.sort_values('date').reset_index(drop=True)
df.head()

Unnamed: 0,date,meantemp,humidity,wind_speed,meanpressure
0,2013-01-01,10.0,84.5,0.0,1015.666667
1,2013-01-02,7.4,92.0,2.98,1017.8
2,2013-01-03,7.166667,87.0,4.633333,1018.666667
3,2013-01-04,8.666667,71.333333,1.233333,1017.166667
4,2013-01-05,6.0,86.833333,3.7,1016.5


In [10]:
df.shape

(1462, 5)

In [11]:
df = df.loc[:500]

In [19]:
df.shape

(501, 5)

In [13]:
# Univariate forecasting WITHOUT exogenous variables
from pycaret.time_series import TSForecastingExperiment
y = df.set_index('date')['meantemp'].asfreq('D')
y = y.sort_index()
ts_no_exog = TSForecastingExperiment()
ts_no_exog.setup(data=y, fh=14, fold=3, session_id=123)
best_no_exog = ts_no_exog.compare_models()
final_no_exog = ts_no_exog.finalize_model(best_no_exog)
forecast_no_exog = ts_no_exog.predict_model(final_no_exog, fh=14)
forecast_no_exog.head()

Unnamed: 0,Description,Value
0,session_id,123
1,Target,meantemp
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(501, 1)"
5,Transformed data shape,"(501, 1)"
6,Transformed train set shape,"(487, 1)"
7,Transformed test set shape,"(14, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.6298,1.4382,2.161,2.5031,0.0749,0.0779,-0.9815,7.1667
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,1.7005,1.6239,2.2555,2.8257,0.0759,0.0813,-1.3772,0.3367
xgboost_cds_dt,Extreme Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.7312,1.5495,2.2957,2.6967,0.0792,0.0833,-1.2894,0.1633
catboost_cds_dt,CatBoost Regressor w/ Cond. Deseasonalize & Detrending,1.8244,1.5966,2.4202,2.7783,0.0822,0.0873,-1.2879,0.71
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,1.9768,1.738,2.6194,3.0268,0.0942,0.0993,-3.1098,0.3067
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,1.9874,1.7624,2.6339,3.0682,0.0923,0.0983,-2.4864,0.1867
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,2.1881,1.9052,2.9032,3.3151,0.0978,0.1062,-2.2385,0.1567
exp_smooth,Exponential Smoothing,2.2111,1.9202,2.9324,3.3423,0.1005,0.1085,-2.5908,0.0333
ets,ETS,2.2112,1.9203,2.9326,3.3426,0.1005,0.1085,-2.5914,0.0367
auto_arima,Auto ARIMA,2.2354,1.939,2.9639,3.3753,0.1023,0.11,-2.7937,0.7867


Unnamed: 0,y_pred
2014-05-17,31.155
2014-05-18,31.1518
2014-05-19,31.1487
2014-05-20,31.1455
2014-05-21,31.1424


In [14]:
# Save the univariate (no exog) model
ts_no_exog.save_model(final_no_exog, 'best_ts_univariate_no_exog')

Transformation Pipeline and Model Successfully Saved


(ForecastingPipeline(steps=[('forecaster',
                             TransformedTargetForecaster(steps=[('model',
                                                                 ForecastingPipeline(steps=[('forecaster',
                                                                                             TransformedTargetForecaster(steps=[('model',
                                                                                                                                 BaseCdsDtForecaster(fe_target_rr=[WindowSummarizer(lag_feature={'lag': [1]},
                                                                                                                                                                                    n_jobs=1)],
                                                                                                                                                     regressor=LGBMRegressor(n_jobs=-1, random_state=123),
                                     

In [20]:
df

Unnamed: 0,date,meantemp,humidity,wind_speed,meanpressure
0,2013-01-01,10.000000,84.500000,0.000000,1015.666667
1,2013-01-02,7.400000,92.000000,2.980000,1017.800000
2,2013-01-03,7.166667,87.000000,4.633333,1018.666667
3,2013-01-04,8.666667,71.333333,1.233333,1017.166667
4,2013-01-05,6.000000,86.833333,3.700000,1016.500000
...,...,...,...,...,...
496,2014-05-12,28.000000,53.375000,10.887500,1004.875000
497,2014-05-13,25.250000,65.750000,9.962500,1008.375000
498,2014-05-14,26.875000,55.250000,8.575000,1006.500000
499,2014-05-15,29.500000,39.875000,11.125000,1004.250000


In [26]:
# Univariate forecasting WITH exogenous variables
# Use humidity, wind_speed, and meanpressure as exogenous regressors
exog_cols = ['humidity', 'wind_speed', 'meanpressure']
df_exog = df.set_index('date')[['meantemp'] + exog_cols].asfreq('D')
df_exog = df_exog.sort_index()
ts_with_exog = TSForecastingExperiment()
ts_with_exog.setup(data=df_exog, target='meantemp', fh=14, fold=3, session_id=123)
best_with_exog = ts_with_exog.compare_models()

Unnamed: 0,Description,Value
0,session_id,123
1,Target,meantemp
2,Approach,Univariate
3,Exogenous Variables,Present
4,Original data shape,"(501, 4)"
5,Transformed data shape,"(501, 4)"
6,Transformed train set shape,"(487, 4)"
7,Transformed test set shape,"(14, 4)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
auto_arima,Auto ARIMA,1.0452,0.9557,1.3843,1.6644,0.0502,0.0519,-0.3445,5.16
arima,ARIMA,1.0932,0.9935,1.448,1.7301,0.0523,0.054,-0.3766,0.3
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,1.3118,1.1569,1.7379,2.0141,0.0635,0.0637,-0.8232,0.1667
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.695,1.4566,2.248,2.5344,0.0765,0.0811,-1.0357,0.2
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,1.8946,1.6486,2.51,2.8707,0.0911,0.0967,-2.6645,0.15
xgboost_cds_dt,Extreme Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.9953,1.6608,2.6451,2.8907,0.0925,0.098,-1.817,0.23
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,1.999,1.6368,2.649,2.8505,0.0957,0.1015,-2.5708,0.1467
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,1.9997,1.6373,2.65,2.8514,0.0957,0.1015,-2.5729,0.1467
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,2.0093,1.6439,2.6627,2.8628,0.0962,0.1021,-2.6012,0.1533
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,2.0367,1.7596,2.699,3.0637,0.0961,0.1019,-2.7637,0.1567


In [28]:
final_with_exog = ts_with_exog.finalize_model(best_with_exog)

In [29]:
# Save the univariate (with exog) model
ts_with_exog.save_model(final_with_exog, 'best_ts_univariate_with_exog')

Transformation Pipeline and Model Successfully Saved


(ForecastingPipeline(steps=[('forecaster',
                             TransformedTargetForecaster(steps=[('model',
                                                                 ForecastingPipeline(steps=[('forecaster',
                                                                                             TransformedTargetForecaster(steps=[('model',
                                                                                                                                 AutoARIMA(random_state=123,
 'best_ts_univariate_with_exog.pkl')