In [1]:

import numpy as np
import pandas as pd
import torch
import gym  # use Gym for compatibility with SKRL's wrappers
from gym.vector import SyncVectorEnv
from gym.spaces import Box
from sklearn.preprocessing import StandardScaler

# SKRL imports
from skrl.envs.torch import wrap_env
from skrl.agents.torch.sac import SAC_DEFAULT_CONFIG, SAC
from skrl.memories.torch import RandomMemory
from skrl.trainers.torch import SequentialTrainer
from skrl.models.torch import Model, GaussianMixin, DeterministicMixin
from skrl.resources.preprocessors.torch import RunningStandardScaler
import matplotlib.pyplot as plt
import warnings
import tqdm
warnings.filterwarnings('ignore')




In [2]:
import torch.nn as nn

In [3]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class TimeSeriesPredictionEnv(gym.Env):
    """Last 6 columns are targets, rest are features."""
    def __init__(self, data: pd.DataFrame, train_ratio: float = 0.8):
        super().__init__()
        self.data = data.reset_index(drop=True)
        self.scaler = StandardScaler()
        # Feature/target split
        self.n_targets = 6
        self.feature_cols = data.columns[:-self.n_targets].tolist()
        self.target_cols = data.columns[-self.n_targets:].tolist()
        # Train/test split
        train_size = int(len(data) * train_ratio)
        train_df = data.iloc[:train_size]
        test_df = data.iloc[train_size:]
        # Fit & transform
        X_train, X_test = train_df[self.feature_cols], test_df[self.feature_cols]
        self.scaler.fit(X_train)
        self.scaled_train_X = self.scaler.transform(X_train)
        self.scaled_test_X  = self.scaler.transform(X_test)
        self.train_y, self.test_y = train_df[self.target_cols].values, test_df[self.target_cols].values
        # Spaces
        obs_dim = len(self.feature_cols)
        self.observation_space = Box(low=-20.0, high=20.0, shape=(obs_dim,), dtype=np.float32)
        self.action_space      = Box(low=-20.0, high=20.0, shape=(self.n_targets,), dtype=np.float32)
        # RNG and initial mode
        self._np_random = None
        self.set_mode(is_training=True)

    @property
    def np_random(self):
        if self._np_random is None:
            self._np_random = np.random.RandomState()
        return self._np_random

    def seed(self, seed=None):
        self._np_random = np.random.RandomState(seed)
        return [seed]

    def set_mode(self, is_training=True):
        self.is_training = is_training
        self.current_x = self.scaled_train_X if is_training else self.scaled_test_X
        self.current_y = self.train_y       if is_training else self.test_y
        self.max_steps = len(self.current_x) - 1
        self.current_step = (self.np_random.randint(0, self.max_steps)
                             if is_training else 0)
        return self.current_x[self.current_step]

    def reset(self, seed=None, options=None):
        if seed is not None:
            self.seed(seed)
        self.current_step = (self.np_random.randint(0, self.max_steps)
                             if self.is_training else 0)
        obs = self.current_x[self.current_step]
        return obs, {"target": self.current_y[self.current_step].copy()}

    def step(self, action):
        pred, actual = action, self.current_y[self.current_step]
        print(f"Pred: {pred}, Actual: {actual}")
        reward = -np.mean((pred - actual)**2)
        self.current_step += 1
        done = self.current_step >= self.max_steps
        if done:
            self.current_step = (self.np_random.randint(0, self.max_steps)
                                 if self.is_training else 0)
        obs = self.current_x[self.current_step]
        return obs, reward, done, False, {"target": actual.copy()}

In [5]:
def make_env(data, train_ratio, seed=None):
    def _thunk():
        env = TimeSeriesPredictionEnv(data, train_ratio)
        if seed is not None:
            env.seed(seed)
        return env
    return _thunk

In [6]:
# Define the critic network (Q-function)
class Critic(DeterministicMixin, Model):
    def __init__(self, observation_space, action_space, device, clip_actions=False):
        Model.__init__(self, observation_space, action_space, device)
        DeterministicMixin.__init__(self, clip_actions)

        self.net = nn.Sequential(
            nn.Linear(self.num_observations + self.num_actions, 256),
            nn.Tanh(),
            nn.Linear(256, 256),
            nn.Tanh(),
            nn.Linear(256, 128),
            nn.Tanh(),
            nn.Linear(128, 1)
        )

    def compute(self, inputs, role):
        return self.net(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1)), {}

In [7]:
# Define the actor network (policy)
class Actor(GaussianMixin, Model):
    def __init__(self, observation_space, action_space, device, clip_actions=False,
                 clip_log_std=True, min_log_std=-20, max_log_std=2):
        Model.__init__(self, observation_space, action_space, device)
        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std)

        self.net = nn.Sequential(
            nn.Linear(self.num_observations, 256),
            nn.Tanh(),
            nn.Linear(256, 256),
            nn.Tanh(),
            nn.Linear(256, 128),
            nn.Tanh(),
            nn.Linear(128, self.num_actions)
        )
        self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))

    def compute(self, inputs, role):
        return self.net(inputs["states"]), self.log_std_parameter, {}

In [8]:
def train_sac(env, timesteps: int = 100000) -> SAC:
    """Train a Soft Actor-Critic agent across multiple envs using SKRL."""
    env = wrap_env(env)
    # Instantiate models
    models = {
        "policy": Actor(env.observation_space, env.action_space, device),
        "critic_1": Critic(env.observation_space, env.action_space, device),
        "critic_2": Critic(env.observation_space, env.action_space, device),
        "target_critic_1": Critic(env.observation_space, env.action_space, device),
        "target_critic_2": Critic(env.observation_space, env.action_space, device)
    }
    for model in models.values():
        model.init_parameters(method_name="normal_", mean=0.0, std=0.1)
    # SAC configuration
    cfg = SAC_DEFAULT_CONFIG.copy()
    cfg.update({
        "gradient_steps": 1,
        "batch_size": 256,
        "random_timesteps": 1000,
        "learning_starts": 1000,
        "learn_entropy": True,
        "entropy_learning_rate": 3e-4,
        "actor_learning_rate": 3e-4,
        "critic_learning_rate": 3e-4,
        "state_preprocessor": RunningStandardScaler,
        "state_preprocessor_kwargs": {"size": env.observation_space, "device": device},
        "discount_factor": 0.99,
        "polyak": 0.005,
        "experiment": {"write_interval": 1000, "checkpoint_interval": 5000}
    })
    agent = SAC(
        models=models,
        memory=RandomMemory(memory_size=90000, num_envs=env.num_envs, device=device),
        cfg=cfg,
        observation_space=env.observation_space,
        action_space=env.action_space,
        device=device
    )
    # Trainer
    cfg_trainer = {"timesteps": timesteps, "headless": True}
    trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
    trainer.train()
    
    return agent

In [None]:
# Main execution
if __name__ == "__main__":
    # Set random seed for reproducibility

    # Load data (you can use nrows for testing with smaller dataset)
    data = pd.read_csv("X.csv")
    data = data[:400_000]
    

    print("Loading large dataset...")
    num_envs = 128
    train_ratio = 0.8
    # `data` is your pandas DataFrame
    env_fns = [make_env(data, train_ratio, seed=i) for i in range(num_envs)]
    env = SyncVectorEnv(env_fns)


    # Train agent
    print("Training SAC agent with skrl...")
    agent = train_sac(env, timesteps=1000_000)

    # Save the trained agent
    agent.save("sac_timeseries_agent_large.pt")


Loading large dataset...


[38;20m[skrl:INFO] Environment wrapper: 'auto' (class: gym.vector.vector_env.VectorEnv)[0m
[38;20m[skrl:INFO] Environment wrapper: Gym[0m


Training SAC agent with skrl...
100%|██████████| 1000/1000 [00:02<00:00, 380.60it/s]


In [10]:
# Option A: raw string with backslashes
agent.load(r"runs\25-04-27_03-32-27-656454_SAC\checkpoints\best_agent.pt")


In [18]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def test_sac(env, agent: SAC, episodes: int = 2) -> None:
    """Evaluate a trained SAC agent, computing average MSE, MAE, and R2."""
    if hasattr(env, 'envs'):
        for sub_env in env.envs:
            if hasattr(sub_env, 'set_mode'):
                sub_env.set_mode(is_training=False)

    env_wrapped = wrap_env(env)

    all_preds_rewards = []
    max_steps = 0
    if hasattr(env, 'envs') and len(env.envs) > 0:
        max_steps = env.envs[0].max_steps
    total_timesteps = episodes * env_wrapped.num_envs * max_steps
    step_counter = 0

    for ep in range(episodes):
        obs, _ = env_wrapped.reset()
        done = False
        while not done:
            with torch.no_grad():
                action = agent.act(obs, timestep=step_counter, timesteps=total_timesteps)[0]
            obs, reward, done, truncated, info = env_wrapped.step(action)

            # Collect predictions and targets
            
            done = done[0]

            step_counter += 1
            all_preds_rewards.append(reward)

    all_preds_rewards = [
        r.cpu().numpy() if torch.is_tensor(r) else r for r in all_preds_rewards
    ]
    all_preds_rewards = np.array(all_preds_rewards)
    all_preds_rewards = all_preds_rewards.reshape(episodes, env_wrapped.num_envs, max_steps, -1)
    all_preds_rewards = all_preds_rewards[:, :, :, 0]  # Select the first target only
    all_preds_rewards = all_preds_rewards.reshape(-1, all_preds_rewards.shape[-1])



In [None]:
print("\nEvaluating the agent on test data...")
test_sac(env, agent, episodes=2)

[38;20m[skrl:INFO] Environment wrapper: 'auto' (class: gym.vector.vector_env.VectorEnv)[0m
[38;20m[skrl:INFO] Environment wrapper: Gym[0m



Evaluating the agent on test data...


In [None]:
def visualize_predictions(agent, env, data, series_idx=0):
    """Visualize predictions for a specific series"""
    env = wrap_env(env)
    obs, _ = env.reset()

    with torch.no_grad():
        action = agent.act(obs, timestep=0, timesteps=0)[0]

    # Convert action to numpy and reshape
    action_np = action.cpu().numpy()
    predicted = action_np.reshape(env.unwrapped.prediction_horizon, env.unwrapped.n_series)

    # Inverse transform predictions
    predicted_original = env.unwrapped.scaler.inverse_transform(predicted)

    # Get actual values
    actual_idx_start = env.unwrapped.current_step + env.unwrapped.lookback_window
    actual_idx_end = actual_idx_start + env.unwrapped.prediction_horizon
    actual = data.iloc[actual_idx_start:actual_idx_end][env.unwrapped.series_cols].values

    # Plot
    plt.figure(figsize=(12, 6))
    plt.plot(range(env.unwrapped.prediction_horizon), actual[:, series_idx],
             label='Actual', marker='o')
    plt.plot(range(env.unwrapped.prediction_horizon), predicted_original[:, series_idx],
             label='Predicted', marker='x')
    plt.title(f'Series {series_idx + 1} - 4-hour Ahead Prediction')
    plt.xlabel('Minutes ahead')
    plt.ylabel('Value')
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
    # Visualize some predictions
print("\nVisualizing predictions...")
for i in range(min(3, env.n_series)):  # Visualize up to 3 series
    visualize_predictions(agent, env, data, series_idx=i)

IndentationError: expected an indented block after 'for' statement on line 3 (2354504556.py, line 4)