In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.api import VAR
from statsmodels.tools.eval_measures import rmse

# ---- Load and clean your data here ----
# Sample dummy data for demonstration:
# Replace this with your own loading and preprocessing logic
df = pd.DataFrame({
    "DateTime": pd.date_range(start='1/1/2005', periods=20, freq='D'),
    "Data1": np.random.rand(20),
    "Data2": np.random.rand(20)
})

# Set DateTime index
df.set_index("DateTime", inplace=True)

# Use only numeric columns
df_numeric = df.select_dtypes(include=[np.number])

# Drop any rows with NaNs (or use fillna() if appropriate)
df_numeric.dropna(inplace=True)

# ---- Splitting ----
total_rows = len(df_numeric)
print("Total rows:", total_rows)

if total_rows <= 2:
    print("❌ Not enough data to proceed with VAR model. Please add more rows.")
else:
    n_obs = min(5, total_rows - 1)  # Leave at least one row for training
    train, test = df_numeric[:-n_obs], df_numeric[-n_obs:]

    print("✅ Train shape:", train.shape)
    print("✅ Test shape:", test.shape)

    # Determine safe maxlags based on training data size
    maxlags = min(15, max(1, int(len(train) * 0.3)))  # 30% heuristic
    print(f"Using maxlags = {maxlags}")

    try:
        # Fit VAR model
        model = VAR(train)

        # Select optimal lag order
        lag_order_result = model.select_order(maxlags=maxlags)
        print("\nSelected Lag Order:\n", lag_order_result.summary())

        best_lag = lag_order_result.aic
        model_fitted = model.fit(best_lag)

        # Forecasting
        forecast_input = train.values[-best_lag:]
        forecast = model_fitted.forecast(y=forecast_input, steps=n_obs)
        forecast_df = pd.DataFrame(forecast, index=test.index, columns=test.columns)

        print("\nForecasted Values:\n", forecast_df)

        # Calculate RMSE
        for col in test.columns:
            error = rmse(test[col], forecast_df[col])
            print(f"RMSE for {col}: {error:.4f}")

    except Exception as e:
        print(f"\n❌ Error: {e}")
        print("Check if your training data has enough rows and no NaN/infinite values.")


Total rows: 20
✅ Train shape: (15, 2)
✅ Test shape: (5, 2)
Using maxlags = 4

Selected Lag Order:
  VAR Order Selection (* highlights the minimums) 
      AIC         BIC         FPE         HQIC   
-------------------------------------------------
0      -6.369      -6.296    0.001716      -6.414
1      -6.209      -5.992    0.002069      -6.346
2      -6.084      -5.723    0.002629      -6.313
3      -7.680      -7.173   0.0007340      -7.999
4     -8.550*     -7.899*  0.0007339*     -8.960*
-------------------------------------------------

Forecasted Values:
                Data1     Data2
DateTime                      
2005-01-16  0.264943  0.326774
2005-01-17  0.280224  0.357588
2005-01-18  0.318066  0.636870
2005-01-19  0.322111  0.377805
2005-01-20  0.389478  0.428071
RMSE for Data1: 0.4831
RMSE for Data2: 0.3524


  self._init_dates(dates, freq)
