In [49]:
import gym
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from gym.spaces import Box
from sklearn.preprocessing import StandardScaler
from gym.vector import SyncVectorEnv


class TimeSeriesPredictionEnv(gym.Env):
    """Last 6 columns are targets, rest are features."""
    def __init__(self, data: pd.DataFrame, train_ratio: float = 0.8, forecast_horizon: int = 240):
        super().__init__()
        self.forecast_horizon = forecast_horizon
        self.data = data.reset_index(drop=True)
        self.scaler = StandardScaler()
        
        # Feature/target split  
        self.n_targets = 6
        self.feature_cols = data.columns[:-self.n_targets].tolist()
        self.target_cols = data.columns[-self.n_targets:].tolist()
        
        # Train/test split
        train_size = int(len(data) * train_ratio)
        train_df = data.iloc[:train_size]
        test_df = data.iloc[train_size:]
        
        # Fit & transform
        X_train, X_test = train_df[self.feature_cols], test_df[self.feature_cols]
        self.scaler.fit(X_train)
        self.scaled_train_X = self.scaler.transform(X_train)
        self.scaled_test_X = self.scaler.transform(X_test)
        self.train_y, self.test_y = train_df[self.target_cols].values, test_df[self.target_cols].values
        
        # Spaces
        obs_dim = len(self.feature_cols)
        self.observation_space = Box(low=-20.0, high=20.0, shape=(obs_dim,), dtype=np.float32)
        self.action_space = Box(low=-20.0, high=20.0, shape=(self.n_targets,), dtype=np.float32)
        
        # RNG and initial mode
        self._np_random = None
        self.set_mode(is_training=True)
    
    @property
    def np_random(self):
        if self._np_random is None:
            self._np_random = np.random.RandomState()
        return self._np_random
    
    def seed(self, seed=None):
        self._np_random = np.random.RandomState(seed)
        return [seed]
    
    def set_mode(self, is_training=True):
        self.is_training = is_training
        self.current_x = self.scaled_train_X if is_training else self.scaled_test_X
        self.current_y = self.train_y if is_training else self.test_y
        self.max_steps = len(self.current_x) - 1
        self.current_step = (self.np_random.randint(0, self.max_steps) 
                            if is_training else 0)
        return self.current_x[self.current_step]
    
    def reset(self, seed=None, options=None):
        if seed is not None:
            self.seed(seed)
        # Ensure we have enough data for 240-step horizon
        max_valid_start = self.max_steps - self.forecast_horizon
        if max_valid_start < 0:
            max_valid_start = 0
        self.current_step = (self.np_random.randint(0, max_valid_start + 1)
                             if self.is_training and max_valid_start > 0 else 0)
        obs = self.current_x[self.current_step]
        # Return target 240 steps ahead
        target_idx = min(self.current_step + self.forecast_horizon, self.max_steps)
        return obs, {"target": self.current_y[target_idx].copy()}
    
    def step(self, action):
        # Get target 240 steps ahead
        target_idx = min(self.current_step + self.forecast_horizon, self.max_steps)
        pred, actual = action, self.current_y[target_idx]
        reward = -np.mean((pred - actual)**2)
        
        self.current_step += 1
        done = self.current_step >= self.max_steps
        
        if done:
            self.current_step = (self.np_random.randint(0, self.max_steps) 
                                if self.is_training else 0)
            
        obs = self.current_x[self.current_step]
        return obs, reward, done, False, {"target": actual.copy()}


class TGCN(nn.Module):
    def __init__(self, n_features, n_targets=6, hidden_dim=64):
        super().__init__()
        self.n_targets = n_targets
        self.n_features = n_features
        
        # Graph convolution for spatial dependencies
        self.gcn1 = nn.Linear(n_features, hidden_dim)
        self.gcn2 = nn.Linear(hidden_dim, hidden_dim)
        
        # GRU for temporal dependencies
        self.gru = nn.GRU(hidden_dim, hidden_dim, batch_first=True)
        
        # Decoder for 240-step prediction
        self.decoder = nn.Sequential(
            nn.Linear(hidden_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, n_targets)  # Predict all 6 series 240 steps ahead
        )
        
        # Adjacency matrix (learnable or fixed based on correlations)
        self.adj_weight = nn.Parameter(torch.eye(n_targets))
        
    def forward(self, x, seq_len=60):  # Use last 60 minutes of data
        # For single timestep input, create a sequence
        if len(x.shape) == 2:
            # Reshape to [batch_size, seq_len, n_features]
            # If x is just one timestep, repeat it seq_len times
            x = x.unsqueeze(1).repeat(1, seq_len, 1)
        
        # Extract last seq_len timesteps if we have more
        if x.shape[1] > seq_len:
            x_seq = x[:, -seq_len:, :]
        else:
            x_seq = x
        
        # Apply GCN to each timestep
        gcn_outputs = []
        for t in range(x_seq.shape[1]):
            h = F.relu(self.gcn1(x_seq[:, t]))
            h = self.gcn2(h)
            gcn_outputs.append(h)
            
        gcn_out = torch.stack(gcn_outputs, dim=1)
        
        # Temporal modeling
        gru_out, _ = self.gru(gcn_out)
        
        # Decode to predict 240 steps ahead
        predictions = self.decoder(gru_out[:, -1])
        
        return predictions


def make_env(data, train_ratio, seed, forecast_horizon):
    def _init():
        env = TimeSeriesPredictionEnv(data, train_ratio, forecast_horizon)
        env.seed(seed)
        return env
    return _init


def train_tgcn(model, env, epochs=50, batch_size=32):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    model.train()
    
    for epoch in range(epochs):
        total_loss = 0
        for _ in range(100):  # 100 batches per epoch
            # Reset the environment
            obs, info = env.reset()
            
            # Handle vectorized environment output
            if isinstance(obs, np.ndarray) and obs.ndim > 1:
                # Observation is already batched
                obs_batch = obs[:batch_size]
                
                # Extract targets from info - SyncVectorEnv returns a list of dicts
                if isinstance(info, list):
                    # Extract target arrays from the list of dictionaries
                    target_list = [item['target'] for item in info[:batch_size]]
                    # Stack them into a single numpy array
                    target_batch = np.stack(target_list, axis=0)
                else:
                    # Single environment case
                    target_batch = info['target']
                    if target_batch.ndim == 1:
                        target_batch = target_batch.reshape(1, -1)
            else:
                # Non-vectorized environment - shouldn't happen with SyncVectorEnv
                obs_batch = []
                target_batch = []
                for _ in range(batch_size):
                    obs, info = env.reset()
                    obs_batch.append(obs)
                    target_batch.append(info['target'])
                obs_batch = np.array(obs_batch)
                target_batch = np.stack(target_batch, axis=0)
            
            # Ensure we have the right batch size
            actual_batch_size = min(batch_size, obs_batch.shape[0])
            obs_batch = obs_batch[:actual_batch_size]
            target_batch = target_batch[:actual_batch_size]
            
            # Convert to tensors
            obs_tensor = torch.FloatTensor(obs_batch)
            target_tensor = torch.FloatTensor(target_batch)
            
            # Forward pass
            predictions = model(obs_tensor)
            loss = F.mse_loss(predictions, target_tensor)
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        
        print(f"Epoch {epoch}: Loss = {total_loss/100:.4f}")


def test_tgcn_240_ahead(env, model, episodes=5):
    model.eval()
    mse_scores = []
    
    with torch.no_grad():
        for episode in range(episodes):
            done = False
            obs, info = env.reset()
            
            # Handle vectorized environments
            if isinstance(obs, np.ndarray) and obs.ndim > 1:
                # Take first env for testing
                obs = obs[0]
                if isinstance(info, list):
                    info = info[0]
            
            while not done:
                # Predict 240 steps ahead
                obs_tensor = torch.FloatTensor(obs).unsqueeze(0)
                pred = model(obs_tensor).squeeze().numpy()
                
                # Step
                next_obs, reward, done, _, next_info = env.step(pred)
                
                # Handle vectorized environments
                if isinstance(next_obs, np.ndarray) and next_obs.ndim > 1:
                    next_obs = next_obs[0]
                    done = done[0]
                    if isinstance(next_info, list):
                        next_info = next_info[0]
                
                actual = next_info['target']
                mse = np.mean((pred - actual)**2)
                mse_scores.append(mse)
                
                obs = next_obs
                info = next_info
    
    print(f"Average MSE over {episodes} episodes: {np.mean(mse_scores):.4f}")
    return np.mean(mse_scores)


# Main execution
if __name__ == "__main__":
    # Configuration
    data_samples = 30_000
    num_envs = 4
    train_ratio = 0.8
    forecast_horizon = 240
    epochs = 1
    
    # Load data
    print(f"Loading dataset ({data_samples} samples)...")
    data = pd.read_csv("X.csv")
    data = data[:data_samples]
    
    # Determine number of features (all columns except last 6)
    n_features = data.shape[1] - 6
    print(f"Number of features: {n_features}, Number of targets: 6")
    
    # Make environments with 240-step forecast horizon
    print(f"Creating {num_envs} environments...")
    env_fns = [make_env(data, train_ratio, seed=i, forecast_horizon=forecast_horizon) 
               for i in range(num_envs)]
    env = SyncVectorEnv(env_fns)
    
    # Initialize TGCN model
    model = TGCN(n_features=n_features, n_targets=6, hidden_dim=64)
    
    # Train TGCN model
    print("Training TGCN model...")
    train_tgcn(model, env, epochs=epochs, batch_size=num_envs)
    
    # Save the trained model
    model_path = "tgcn_timeseries_240step_model.pt"
    torch.save(model.state_dict(), model_path)
    print(f"Model saved to: {model_path}")
    
    # Test the model
    print("\nTesting the trained model...")
    test_tgcn_240_ahead(env, model, episodes=5)

Loading dataset (30000 samples)...
Number of features: 76, Number of targets: 6
Creating 4 environments...
Training TGCN model...


TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.