In [2]:
%pip install seaborn[stats] numpy scikit-learn statsforecast ipywidgets
%pip install "ray[all] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp311-cp311-manylinux2014_x86_64.whl"

Collecting ipywidgets
  Using cached ipywidgets-8.1.2-py3-none-any.whl.metadata (2.4 kB)
Collecting widgetsnbextension~=4.0.10 (from ipywidgets)
  Using cached widgetsnbextension-4.0.10-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab-widgets~=3.0.10 (from ipywidgets)
  Using cached jupyterlab_widgets-3.0.10-py3-none-any.whl.metadata (4.1 kB)
Using cached ipywidgets-8.1.2-py3-none-any.whl (139 kB)
Using cached jupyterlab_widgets-3.0.10-py3-none-any.whl (215 kB)
Using cached widgetsnbextension-4.0.10-py3-none-any.whl (2.3 MB)
Installing collected packages: widgetsnbextension, jupyterlab-widgets, ipywidgets
Successfully installed ipywidgets-8.1.2 jupyterlab-widgets-3.0.10 widgetsnbextension-4.0.10
Note: you may need to restart the kernel to use updated packages.
Collecting ray@ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp311-cp311-manylinux2014_x86_64.whl (from ray[all]@ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp311-cp311-many

In [1]:
import pandas as pd
import numpy as np
import ray
from statsforecast import StatsForecast
from statsforecast.models import ETS, AutoARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import TimeSeriesSplit
from typing import Dict, Callable

In [2]:
# Start Ray
ray.init()

# OK so this is basically unusable OOTB

Usage stats collection is enabled by default for nightly wheels. To disable this, run the following command: `ray disable-usage-stats` before starting Ray. See https://docs.ray.io/en/master/cluster/usage-stats.html for more details.


: 

In [None]:
# Data loading
drive_path = "drive/MyDrive/Colab Data/AutoGluon/PJM_Load_hourly.csv"
local_path = "data/PJM_Load_hourly.csv"
test_path = "data/test_energy.csv"

# Choose the correct path depending on the environment
selected_path = drive_path if os.path.exists(drive_path) else local_path

# Load and prepare the training data
train_df = pd.read_csv(selected_path)
train_df['Datetime'] = pd.to_datetime(train_df['Datetime'])
train_df.set_index('Datetime', inplace=True)
train_df = train_df[['PJM_Load_MW']]  # Assuming 'PJM_Load_MW' is the column you want to forecast

# Load and prepare the test data
test_df = pd.read_csv(test_path)
test_df['Datetime'] = pd.to_datetime(test_df['Datetime'])
test_df.set_index('Datetime', inplace=True)
test_df = test_df[['PJM_Load_MW']]  # The same column as in the training data

# Combine train and test data for cross-validation
df = pd.concat([train_df, test_df])

# Define label column for use in the forecasting function
label_column = 'PJM_Load_MW'

In [None]:
# Define your metric functions
metrics: Dict[str, Callable[[np.ndarray, np.ndarray], float]] = {
    'MSE': mean_squared_error,
    'MAE': mean_absolute_error
}

@ray.remote
def train_and_evaluate_fold(model, df, train_indices, test_indices, label_column, metrics, freq='D'):
    try:
        # Create the StatsForecast object with train data & model.
        statsforecast = StatsForecast(df=df.iloc[train_indices], models=[model], freq=freq)
        # Make a forecast and calculate metrics on test data.
        # This will fit the model first automatically.
        forecast = statsforecast.forecast(h=len(test_indices))
        return {
            metric_name: metric(
                df.iloc[test_indices][label_column], forecast[model.__class__.__name__]
            )
            for metric_name, metric in metrics.items()
        }
    except Exception as e:
        print(f"An exception occurred: {e}")
        # In case the model fit or eval fails, return None for all metrics.
        return {metric_name: None for metric_name, metric in metrics.items()}



In [None]:
# Define the model(s) you want to use
models = [ETS(season_length=12), AutoARIMA()]  # Adjust models as needed

# Define the cross-validation strategy
tscv = TimeSeriesSplit(n_splits=5)  # 5-fold cross-validation

# Train and evaluate each fold
results = []
for train_indices, test_indices in tscv.split(df):
    fold_results = [train_and_evaluate_fold.remote(
        model, df, train_indices, test_indices, label_column, metrics
    ) for model in models]
    results.extend(fold_results)

# Retrieve and print results
metrics_results = ray.get(results)
print(metrics_results)

# Shutdown Ray
ray.shutdown()