### GANS


In [134]:
import os
import copy
import math
import time
import numpy as np
import pandas as pd
from typing import Optional, Tuple

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [135]:
os.getcwd()

'/home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files'

In [136]:
os.chdir(path = "data/processed_files")

FileNotFoundError: [Errno 2] No such file or directory: 'data/processed_files'

In [137]:
DATA_DIR = os.getcwd()
TRAIN_DIR = os.path.join(DATA_DIR, "train")
VAL_DIR   = os.path.join(DATA_DIR, "valid")
TEST_DIR  = os.path.join(DATA_DIR, "test")

def list_datasets(path):
    return sorted([os.path.join(path, f) for f in os.listdir(path) if f.endswith(".parquet")])

train_datasets = list_datasets(TRAIN_DIR)
val_datasets   = list_datasets(VAL_DIR)
test_datasets  = list_datasets(TEST_DIR)

print("Train:", train_datasets)
print("Val:  ", val_datasets)
print("Test: ", test_datasets)


Train: ['/home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/BOVESPA_train.parquet', '/home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/FTSE_train.parquet', '/home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/MSCI_train.parquet', '/home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/NIFTY50_train.parquet', '/home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/SHANGHAI_train.parquet']
Val:   ['/home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/valid/BOVESPA_valid.parquet', '/home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/valid/FTSE_valid.parquet', '/home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/valid/MSCI_valid.parquet', '/home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/valid/NIFTY50_valid.parque

Base Model Interface

In [138]:
class BaseGAN:
    def __init__(self, config):
        self.config = config
    
    def train(self, train_data):
        """Train model and update weights."""
        raise NotImplementedError

    def validate(self, val_data):
        """Return validation metric used to tune hyperparameters."""
        raise NotImplementedError

    def generate(self, n_samples):
        """Generate synthetic data."""
        raise NotImplementedError

    def name(self):
        return self.__class__.__name__


In [139]:
def create_windows(data, seq_len):
    X = []
    for i in range(len(data) - seq_len):
        X.append(data[i:i+seq_len])
    return np.array(X)


Metric Functions


In [140]:
from sklearn.metrics import mean_squared_error
from scipy.stats import ks_2samp

def compute_metrics(real, synthetic):
    return {
        "MSE": mean_squared_error(real.flatten(), synthetic.flatten()),
        "KS": np.mean([
            ks_2samp(real[:, i], synthetic[:, i]).pvalue
            for i in range(real.shape[1])
        ])
    }


GAN Placeholders


In [None]:
# class TimeGAN(BaseGAN):
#     def __init__(self, config):
#         super().__init__(config)
#         self.trained_data = None
    
#     def train(self, train_data):
#         print("Training TimeGAN...")
#         self.trained_data = np.asarray(train_data, dtype=np.float32)
    
#     def validate(self, val_data):
#         return np.random.random()  # placeholder
    
#     def generate(self, n_samples):
#         if self.trained_data is None:
#             raise RuntimeError("Model not trained yet")
#         # Return random samples from training data as placeholder
#         if len(self.trained_data) >= n_samples:
#             return self.trained_data[:n_samples]
#         else:
#             # Repeat if not enough samples
#             return np.tile(self.trained_data, (n_samples // len(self.trained_data) + 1, 1))[:n_samples]







### TimeGAN

In [195]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset

# -------------------------------
# Embedder Network (RNN-based)
# -------------------------------
class Embedder(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers=3):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.rnn = nn.GRU(
            input_dim, hidden_dim, num_layers,
            batch_first=True
        )
        self.fc = nn.Linear(hidden_dim, hidden_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # x: (batch, seq_len, input_dim)
        h, _ = self.rnn(x)
        h = self.sigmoid(self.fc(h))
        return h

# -------------------------------
# Recovery Network (RNN-based)
# -------------------------------
class Recovery(nn.Module):
    def __init__(self, hidden_dim, output_dim, num_layers=3):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.rnn = nn.GRU(
            hidden_dim, hidden_dim, num_layers,
            batch_first=True
        )
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, h):
        # h: (batch, seq_len, hidden_dim)
        x, _ = self.rnn(h)
        x = self.sigmoid(self.fc(x))
        return x

# -------------------------------
# Generator Network (RNN-based)
# -------------------------------
class Generator(nn.Module):
    def __init__(self, noise_dim, hidden_dim, num_layers=3):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.rnn = nn.GRU(
            noise_dim, hidden_dim, num_layers,
            batch_first=True
        )
        self.fc = nn.Linear(hidden_dim, hidden_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, z):
        # z: (batch, seq_len, noise_dim)
        h, _ = self.rnn(z)
        h = self.sigmoid(self.fc(h))
        return h

# -------------------------------
# Supervisor Network (for step-ahead prediction)
# -------------------------------
class Supervisor(nn.Module):
    def __init__(self, hidden_dim, num_layers=2):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.rnn = nn.GRU(
            hidden_dim, hidden_dim, num_layers,
            batch_first=True
        )
        self.fc = nn.Linear(hidden_dim, hidden_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, h):
        # h: (batch, seq_len, hidden_dim)
        h_out, _ = self.rnn(h)
        h_out = self.sigmoid(self.fc(h_out))
        return h_out

# -------------------------------
# Discriminator Network (RNN-based)
# -------------------------------
class Discriminator(nn.Module):
    def __init__(self, hidden_dim, num_layers=3):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.rnn = nn.GRU(
            hidden_dim, hidden_dim, num_layers,
            batch_first=True
        )
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, h):
        # h: (batch, seq_len, hidden_dim)
        h_out, _ = self.rnn(h)
        y = self.fc(h_out)  # (batch, seq_len, 1)
        return y

# -------------------------------
# TimeGAN Implementation
# -------------------------------
class TimeGAN(BaseGAN):
    def __init__(self, config=None):
        super().__init__(config or {})
        cfg = self.config

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Hyperparameters
        self.hidden_dim = cfg.get("hidden_dim", 24)
        self.noise_dim = cfg.get("noise_dim", 24)
        self.num_layers = cfg.get("num_layers", 3)
        self.lr = cfg.get("lr", 1e-3)
        self.batch_size = cfg.get("batch_size", 128)
        self.epochs = cfg.get("epochs", 50)
        
        # Training iterations for each phase
        self.iterations = cfg.get("iterations", 10000)

        # Data-dependent
        self.seq_len = None
        self.n_features = None
        self.data_min = None
        self.data_max = None
        self.data_mean = None
        self.data_std = None

        # Networks
        self.embedder = None
        self.recovery = None
        self.generator = None
        self.supervisor = None
        self.discriminator = None
        
        # Optimizers
        self.opt_autoencoder = None
        self.opt_supervisor = None
        self.opt_generator = None
        self.opt_discriminator = None

    def _normalize_data(self, data, fit=True):
        if fit:
            self.data_min = np.min(data, axis=(0, 1), keepdims=True)
            self.data_max = np.max(data, axis=(0, 1), keepdims=True)
            self.data_mean = np.mean(data, axis=(0, 1), keepdims=True)
            self.data_std = np.std(data, axis=(0, 1), keepdims=True) + 1e-8
            
            print(f"[TimeGAN] Data range: [{data.min():.4f}, {data.max():.4f}]")
            print(f"[TimeGAN] Mean: {self.data_mean.squeeze():.6f}, Std: {self.data_std.squeeze():.6f}")
        
        # Min-max normalization to [0, 1] (TimeGAN uses sigmoid activations)
        data_range = self.data_max - self.data_min + 1e-8
        normalized = (data - self.data_min) / data_range
        normalized = np.clip(normalized, 0.0, 1.0)
        
        if fit:
            print(f"[TimeGAN] Normalized range: [{normalized.min():.4f}, {normalized.max():.4f}]")
        
        return normalized

    def _denormalize_data(self, data):
        data_range = self.data_max - self.data_min
        return data * data_range + self.data_min

    def _make_windows(self, series, seq_len):
        T = series.shape[0]
        if T < seq_len:
            raise ValueError(f"Time series too short ({T}) for seq_len={seq_len}")
        
        windows = []
        for i in range(T - seq_len + 1):
            windows.append(series[i:i+seq_len])
        
        return np.array(windows)

    def _build_models(self, seq_len, n_features):
        self.seq_len = seq_len
        self.n_features = n_features

        # Build networks
        self.embedder = Embedder(n_features, self.hidden_dim, self.num_layers).to(self.device)
        self.recovery = Recovery(self.hidden_dim, n_features, self.num_layers).to(self.device)
        self.generator = Generator(self.noise_dim, self.hidden_dim, self.num_layers).to(self.device)
        self.supervisor = Supervisor(self.hidden_dim, 2).to(self.device)
        self.discriminator = Discriminator(self.hidden_dim, self.num_layers).to(self.device)
        
        # Optimizers
        self.opt_autoencoder = optim.Adam(
            list(self.embedder.parameters()) + list(self.recovery.parameters()),
            lr=self.lr
        )
        self.opt_supervisor = optim.Adam(
            list(self.supervisor.parameters()) + list(self.generator.parameters()),
            lr=self.lr
        )
        self.opt_generator = optim.Adam(
            list(self.generator.parameters()) + list(self.supervisor.parameters()),
            lr=self.lr
        )
        self.opt_discriminator = optim.Adam(
            self.discriminator.parameters(),
            lr=self.lr
        )
        
        print(f"[TimeGAN] Models built:")
        print(f"  Embedder: {sum(p.numel() for p in self.embedder.parameters()):,}")
        print(f"  Recovery: {sum(p.numel() for p in self.recovery.parameters()):,}")
        print(f"  Generator: {sum(p.numel() for p in self.generator.parameters()):,}")
        print(f"  Supervisor: {sum(p.numel() for p in self.supervisor.parameters()):,}")
        print(f"  Discriminator: {sum(p.numel() for p in self.discriminator.parameters()):,}")

    def train(self, train_data):
        data = np.asarray(train_data, dtype=np.float32)

        if data.ndim == 1:
            data = data[:, None]
        
        # Handle NaN
        if np.isnan(data).any():
            nan_count = np.isnan(data).sum()
            print(f"[TimeGAN] WARNING: Found {nan_count} NaN values, handling...")
            data_df = pd.DataFrame(data)
            data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)
            data = data_df.values
            print(f"[TimeGAN] NaN handling complete")
        
        T, n_features = data.shape
        
        # Choose seq_len
        max_seq_len = T
        if max_seq_len >= 256:
            seq_len = 256
        elif max_seq_len >= 128:
            seq_len = 128
        elif max_seq_len >= 64:
            seq_len = 64
        elif max_seq_len >= 32:
            seq_len = 32
        elif max_seq_len >= 24:
            seq_len = 24
        else:
            seq_len = max(16, max_seq_len)
        
        print(f"[TimeGAN] Creating windows: T={T}, seq_len={seq_len}, n_features={n_features}")
        
        data = self._make_windows(data, seq_len)
        data = self._normalize_data(data, fit=True)
        
        n_windows = data.shape[0]
        print(f"[TimeGAN] Created {n_windows} windows")

        if self.embedder is None:
            self._build_models(seq_len, n_features)

        # Convert to tensors
        data_tensor = torch.tensor(data, dtype=torch.float32).to(self.device)
        
        # ===================================
        # Phase 1: Train Embedder & Recovery
        # ===================================
        print("[TimeGAN] Phase 1: Training Autoencoder...")
        for epoch in range(self.epochs // 2):
            self.embedder.train()
            self.recovery.train()
            
            # Sample random batch
            idx = np.random.permutation(n_windows)[:self.batch_size]
            X = data_tensor[idx]
            
            # Forward
            H = self.embedder(X)
            X_tilde = self.recovery(H)
            
            # Reconstruction loss
            loss_reconstruction = nn.MSELoss()(X, X_tilde)
            
            # Backward
            self.opt_autoencoder.zero_grad()
            loss_reconstruction.backward()
            torch.nn.utils.clip_grad_norm_(
                list(self.embedder.parameters()) + list(self.recovery.parameters()),
                max_norm=1.0
            )
            self.opt_autoencoder.step()
            
            if (epoch + 1) % 10 == 0 or epoch == 0:
                print(f"  Epoch {epoch+1}/{self.epochs//2} | Reconstruction Loss: {loss_reconstruction.item():.4f}")
        
        # ===================================
        # Phase 2: Train Supervisor
        # ===================================
        print("[TimeGAN] Phase 2: Training Supervisor...")
        for epoch in range(self.epochs // 2):
            self.supervisor.train()
            self.generator.train()
            
            idx = np.random.permutation(n_windows)[:self.batch_size]
            X = data_tensor[idx]
            
            # Generate embeddings
            with torch.no_grad():
                H = self.embedder(X)
            
            # Supervised loss: predict next step
            H_supervise = self.supervisor(H)
            loss_supervisor = nn.MSELoss()(H[:, 1:, :], H_supervise[:, :-1, :])
            
            # Backward
            self.opt_supervisor.zero_grad()
            loss_supervisor.backward()
            torch.nn.utils.clip_grad_norm_(
                list(self.supervisor.parameters()) + list(self.generator.parameters()),
                max_norm=1.0
            )
            self.opt_supervisor.step()
            
            if (epoch + 1) % 10 == 0 or epoch == 0:
                print(f"  Epoch {epoch+1}/{self.epochs//2} | Supervisor Loss: {loss_supervisor.item():.4f}")
        
        # ===================================
        # Phase 3: Joint Training (GAN)
        # ===================================
        print("[TimeGAN] Phase 3: Joint Adversarial Training...")
        for epoch in range(self.epochs):
            g_losses = []
            d_losses = []
            
            for _ in range(min(6, n_windows // self.batch_size)):
                idx = np.random.permutation(n_windows)[:self.batch_size]
                X = data_tensor[idx]
                
                # ===== Train Discriminator =====
                self.discriminator.train()
                
                # Real data
                with torch.no_grad():
                    H_real = self.embedder(X)
                
                # Fake data
                Z = torch.randn(self.batch_size, self.seq_len, self.noise_dim).to(self.device)
                with torch.no_grad():
                    E_hat = self.generator(Z)
                    H_hat = self.supervisor(E_hat)
                
                # Discriminator loss
                y_real = self.discriminator(H_real)
                y_fake = self.discriminator(H_hat)
                
                d_loss_real = nn.BCEWithLogitsLoss()(y_real, torch.ones_like(y_real))
                d_loss_fake = nn.BCEWithLogitsLoss()(y_fake, torch.zeros_like(y_fake))
                d_loss = d_loss_real + d_loss_fake
                
                self.opt_discriminator.zero_grad()
                d_loss.backward()
                torch.nn.utils.clip_grad_norm_(self.discriminator.parameters(), max_norm=1.0)
                self.opt_discriminator.step()
                
                d_losses.append(d_loss.item())
                
                # ===== Train Generator =====
                self.generator.train()
                self.supervisor.train()
                
                # Generate fake data
                Z = torch.randn(self.batch_size, self.seq_len, self.noise_dim).to(self.device)
                E_hat = self.generator(Z)
                H_hat = self.supervisor(E_hat)
                
                # Reconstruct
                with torch.no_grad():
                    H_real = self.embedder(X)
                H_supervise = self.supervisor(H_real)
                
                # Generator losses
                y_fake_g = self.discriminator(H_hat)
                g_loss_u = nn.BCEWithLogitsLoss()(y_fake_g, torch.ones_like(y_fake_g))
                g_loss_s = nn.MSELoss()(H_real[:, 1:, :], H_supervise[:, :-1, :])
                
                # Moment matching loss
                g_loss_v1 = torch.mean(torch.abs(torch.mean(H_real, dim=0) - torch.mean(H_hat, dim=0)))
                g_loss_v2 = torch.mean(torch.abs(torch.std(H_real, dim=0) - torch.std(H_hat, dim=0)))
                g_loss_v = g_loss_v1 + g_loss_v2
                
                # Total generator loss
                g_loss = g_loss_u + 100 * torch.sqrt(g_loss_s) + 100 * g_loss_v
                
                self.opt_generator.zero_grad()
                g_loss.backward()
                torch.nn.utils.clip_grad_norm_(
                    list(self.generator.parameters()) + list(self.supervisor.parameters()),
                    max_norm=1.0
                )
                self.opt_generator.step()
                
                g_losses.append(g_loss.item())
            
            if len(d_losses) > 0 and len(g_losses) > 0:
                avg_d = np.mean(d_losses)
                avg_g = np.mean(g_losses)
                
                if (epoch + 1) % 5 == 0 or epoch == 0:
                    print(f"[TimeGAN] Epoch {epoch+1}/{self.epochs} | D={avg_d:.4f} | G={avg_g:.4f}")

    def validate(self, val_data):
        data = np.asarray(val_data, dtype=np.float32)
        
        if data.ndim == 1:
            data = data[:, None]
        
        if np.isnan(data).any():
            data_df = pd.DataFrame(data)
            data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)
            data = data_df.values
        
        if self.embedder is None:
            raise RuntimeError("Model not trained yet")
        
        T = data.shape[0]
        
        if T < self.seq_len:
            temp_seq_len = max(16, T)
            if temp_seq_len < 16:
                return 0.0
            
            data = self._make_windows(data, temp_seq_len)
            data = self._normalize_data(data, fit=False)
            
            n_windows, _, n_features = data.shape
            padded_data = np.zeros((n_windows, self.seq_len, n_features), dtype=np.float32)
            padded_data[:, :temp_seq_len, :] = data
            data = padded_data
        else:
            data = self._make_windows(data, self.seq_len)
            data = self._normalize_data(data, fit=False)
        
        real = torch.tensor(data, dtype=torch.float32).to(self.device)
        
        self.embedder.eval()
        self.discriminator.eval()
        
        with torch.no_grad():
            batch_size = min(self.batch_size, real.size(0))
            H_real = self.embedder(real[:batch_size])
            y_real = self.discriminator(H_real)
            score = torch.sigmoid(y_real).mean().item()
        
        self.embedder.train()
        self.discriminator.train()
        
        return score

    def generate(self, n_samples):
        if self.generator is None:
            raise RuntimeError("Model not trained yet")

        self.generator.eval()
        self.supervisor.eval()
        self.recovery.eval()
        
        n_windows = max(1, (n_samples + self.seq_len - 1) // self.seq_len)
        
        out = []
        with torch.no_grad():
            for i in range(0, n_windows, self.batch_size):
                b = min(self.batch_size, n_windows - i)
                Z = torch.randn(b, self.seq_len, self.noise_dim).to(self.device)
                E_hat = self.generator(Z)
                H_hat = self.supervisor(E_hat)
                X_hat = self.recovery(H_hat)
                out.append(X_hat.cpu().numpy())

        self.generator.train()
        self.supervisor.train()
        self.recovery.train()
        
        windows = np.concatenate(out, axis=0)
        windows = self._denormalize_data(windows)
        
        reconstructed = windows.reshape(-1, self.n_features)
        reconstructed = reconstructed[:n_samples]
        
        if reconstructed.shape[0] < n_samples:
            padding = np.repeat(reconstructed[-1:], n_samples - reconstructed.shape[0], axis=0)
            reconstructed = np.vstack([reconstructed, padding])
        
        return reconstructed

### QuantGAN

In [185]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset

# -------------------------------
# Causal Convolution with proper length preservation
# -------------------------------
class CausalConv1d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation=1):
        super().__init__()
        self.padding = (kernel_size - 1) * dilation
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, 
                             padding=self.padding, dilation=dilation)
    
    def forward(self, x):
        # Apply conv with left padding
        out = self.conv(x)
        # Remove right padding to maintain causality and original length
        if self.padding > 0:
            out = out[:, :, :-self.padding]
        return out

# -------------------------------
# Temporal Block with Causal Convolutions
# -------------------------------
class TemporalBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation, dropout=0.2):
        super().__init__()
        
        self.conv1 = CausalConv1d(in_channels, out_channels, kernel_size, dilation)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)
        
        self.conv2 = CausalConv1d(out_channels, out_channels, kernel_size, dilation)
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)
        
        self.net = nn.Sequential(
            self.conv1, self.bn1, self.relu1, self.dropout1,
            self.conv2, self.bn2, self.relu2, self.dropout2
        )
        
        self.downsample = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels else None
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)

# -------------------------------
# Temporal Convolutional Network
# -------------------------------
class TemporalConvNet(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=3, dropout=0.2):
        super().__init__()
        layers = []
        num_levels = len(num_channels)
        
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = num_inputs if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            
            layers.append(TemporalBlock(
                in_channels, out_channels, kernel_size, 
                dilation=dilation_size, dropout=dropout
            ))
        
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

# -------------------------------
# QuantGAN Generator (TCN-based)
# -------------------------------
class QuantGAN_Generator(nn.Module):
    def __init__(self, noise_dim, seq_len, n_features, hidden_channels=[64, 64, 32]):
        super().__init__()
        self.seq_len = seq_len
        self.n_features = n_features
        
        # Project noise to initial sequence
        self.fc = nn.Linear(noise_dim, hidden_channels[0] * (seq_len // 4))
        
        # Upsample to target length
        self.upsample1 = nn.ConvTranspose1d(hidden_channels[0], hidden_channels[0], 4, stride=2, padding=1)
        self.bn1 = nn.BatchNorm1d(hidden_channels[0])
        self.relu1 = nn.ReLU()
        
        self.upsample2 = nn.ConvTranspose1d(hidden_channels[0], hidden_channels[1], 4, stride=2, padding=1)
        self.bn2 = nn.BatchNorm1d(hidden_channels[1])
        self.relu2 = nn.ReLU()
        
        # TCN for temporal dependencies
        self.tcn = TemporalConvNet(
            num_inputs=hidden_channels[1],
            num_channels=hidden_channels[1:],
            kernel_size=3,
            dropout=0.2
        )
        
        # Final projection
        self.output = nn.Sequential(
            nn.Conv1d(hidden_channels[-1], n_features, 1),
            nn.Tanh()
        )

    def forward(self, z):
        batch_size = z.size(0)
        
        # Project and reshape
        x = self.fc(z)
        x = x.view(batch_size, -1, self.seq_len // 4)
        
        # Upsample
        x = self.relu1(self.bn1(self.upsample1(x)))
        x = self.relu2(self.bn2(self.upsample2(x)))
        
        # Ensure correct length
        if x.size(2) != self.seq_len:
            x = x[:, :, :self.seq_len]
        
        # Apply TCN (maintains length due to causal padding)
        x = self.tcn(x)
        
        # Final adjustment to ensure exact length
        if x.size(2) != self.seq_len:
            x = x[:, :, :self.seq_len]
        
        # Output
        x = self.output(x)
        
        return x.permute(0, 2, 1)

# -------------------------------
# QuantGAN Discriminator (TCN-based)
# -------------------------------
class QuantGAN_Discriminator(nn.Module):
    def __init__(self, seq_len, n_features, hidden_channels=[32, 64, 128]):
        super().__init__()
        
        self.tcn = TemporalConvNet(
            num_inputs=n_features,
            num_channels=hidden_channels,
            kernel_size=3,
            dropout=0.2
        )
        
        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool1d(1),
            nn.Flatten(),
            nn.Linear(hidden_channels[-1], 64),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = self.tcn(x)
        x = self.classifier(x)
        return x

# -------------------------------
# QuantGAN (WGAN-GP with TCN)
# -------------------------------
class QuantGAN(BaseGAN):
    def __init__(self, config=None):
        super().__init__(config or {})
        cfg = self.config

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        self.noise_dim = cfg.get("noise_dim", 100)
        self.lr_g = cfg.get("lr_g", 1e-4)
        self.lr_d = cfg.get("lr_d", 1e-4)
        self.batch_size = cfg.get("batch_size", 64)
        self.epochs = cfg.get("epochs", 50)
        self.lambda_gp = cfg.get("lambda_gp", 10.0)
        self.n_critic = cfg.get("n_critic", 3)
        self.hidden_channels_g = cfg.get("hidden_channels_g", [64, 64, 32])
        self.hidden_channels_d = cfg.get("hidden_channels_d", [32, 64, 128])

        self.seq_len = None
        self.n_features = None
        self.data_min = None
        self.data_max = None
        self.data_mean = None
        self.data_std = None

        self.G = None
        self.D = None
        self.opt_G = None
        self.opt_D = None

    def _normalize_data(self, data, fit=True):
        if fit:
            self.data_min = np.min(data, axis=(0, 1), keepdims=True)
            self.data_max = np.max(data, axis=(0, 1), keepdims=True)
            self.data_mean = np.mean(data, axis=(0, 1), keepdims=True)
            self.data_std = np.std(data, axis=(0, 1), keepdims=True) + 1e-8
            
            print(f"[QuantGAN] Data range: [{data.min():.4f}, {data.max():.4f}]")
            print(f"[QuantGAN] Mean: {self.data_mean.squeeze():.6f}, Std: {self.data_std.squeeze():.6f}")
        
        data_range = self.data_max - self.data_min + 1e-8
        normalized = 2 * (data - self.data_min) / data_range - 1
        normalized = np.clip(normalized, -0.99, 0.99)
        
        if fit:
            print(f"[QuantGAN] Normalized range: [{normalized.min():.4f}, {normalized.max():.4f}]")
        
        return normalized

    def _denormalize_data(self, data):
        data_range = self.data_max - self.data_min
        return (data + 1) / 2 * data_range + self.data_min

    def _make_windows(self, series, seq_len):
        T = series.shape[0]
        if T < seq_len:
            raise ValueError(f"Time series too short ({T}) for seq_len={seq_len}")
        
        windows = []
        for i in range(T - seq_len + 1):
            windows.append(series[i:i+seq_len])
        
        return np.array(windows)

    def _build_models(self, seq_len, n_features):
        self.seq_len = seq_len
        self.n_features = n_features

        self.G = QuantGAN_Generator(
            self.noise_dim, seq_len, n_features, self.hidden_channels_g
        ).to(self.device)

        self.D = QuantGAN_Discriminator(
            seq_len, n_features, self.hidden_channels_d
        ).to(self.device)

        self.opt_G = optim.Adam(self.G.parameters(), lr=self.lr_g, betas=(0.0, 0.9))
        self.opt_D = optim.Adam(self.D.parameters(), lr=self.lr_d, betas=(0.0, 0.9))
        
        self._init_weights(self.G)
        self._init_weights(self.D)
        
        print(f"[QuantGAN] Models built - G params: {sum(p.numel() for p in self.G.parameters()):,}")
        print(f"[QuantGAN] Models built - D params: {sum(p.numel() for p in self.D.parameters()):,}")
    
    def _init_weights(self, m):
        if isinstance(m, (nn.Conv1d, nn.ConvTranspose1d, nn.Linear)):
            nn.init.normal_(m.weight, 0.0, 0.02)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, (nn.BatchNorm1d, nn.LayerNorm)):
            if hasattr(m, 'weight') and m.weight is not None:
                nn.init.normal_(m.weight, 1.0, 0.02)
            if hasattr(m, 'bias') and m.bias is not None:
                nn.init.constant_(m.bias, 0)

    def _gradient_penalty(self, real, fake):
        batch_size = real.size(0)
        alpha = torch.rand(batch_size, 1, 1, device=self.device)
        
        interp = (alpha * real + (1 - alpha) * fake).requires_grad_(True)
        d_interp = self.D(interp)
        
        gradients = torch.autograd.grad(
            outputs=d_interp,
            inputs=interp,
            grad_outputs=torch.ones_like(d_interp),
            create_graph=True,
            retain_graph=True,
            only_inputs=True
        )[0]
        
        gradients = gradients.reshape(batch_size, -1)
        grad_norm = gradients.norm(2, dim=1)
        penalty = torch.mean((grad_norm - 1) ** 2)
        
        return penalty

    def train(self, train_data):
        data = np.asarray(train_data, dtype=np.float32)

        if data.ndim == 1:
            data = data[:, None]
        
        if np.isnan(data).any():
            nan_count = np.isnan(data).sum()
            print(f"[QuantGAN] WARNING: Found {nan_count} NaN values, handling...")
            data_df = pd.DataFrame(data)
            data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)
            data = data_df.values
            print(f"[QuantGAN] NaN handling complete")
        
        T, n_features = data.shape
        
        max_seq_len = (T // 4) * 4
        if max_seq_len < 16:
            raise ValueError(f"Time series too short ({T}) for QuantGAN")
        
        if max_seq_len >= 256:
            seq_len = 256
        elif max_seq_len >= 128:
            seq_len = 128
        elif max_seq_len >= 64:
            seq_len = 64
        elif max_seq_len >= 32:
            seq_len = 32
        else:
            seq_len = max_seq_len
        
        print(f"[QuantGAN] Creating windows: T={T}, seq_len={seq_len}, n_features={n_features}")
        
        data = self._make_windows(data, seq_len)
        data = self._normalize_data(data, fit=True)
        
        n_windows = data.shape[0]
        print(f"[QuantGAN] Created {n_windows} windows")

        if self.G is None:
            self._build_models(seq_len, n_features)

        dataset = TensorDataset(torch.tensor(data, dtype=torch.float32))
        loader = DataLoader(dataset, self.batch_size, shuffle=True, drop_last=True)

        print(f"[QuantGAN] Starting training with {len(loader)} batches per epoch")
        
        for epoch in range(self.epochs):
            d_losses = []
            g_losses = []
            gp_losses = []
            
            for batch_idx, (real,) in enumerate(loader):
                real = real.to(self.device)
                batch_size = real.size(0)

                for _ in range(self.n_critic):
                    self.opt_D.zero_grad()
                    
                    z = torch.randn(batch_size, self.noise_dim, device=self.device)
                    
                    with torch.no_grad():
                        fake = self.G(z)
                    
                    d_real = self.D(real)
                    d_fake = self.D(fake)
                    
                    d_loss = d_fake.mean() - d_real.mean()
                    gp = self._gradient_penalty(real, fake)
                    d_total = d_loss + self.lambda_gp * gp
                    
                    if not (torch.isnan(d_total) or torch.isinf(d_total) or abs(d_total.item()) > 1e6):
                        d_total.backward()
                        torch.nn.utils.clip_grad_norm_(self.D.parameters(), max_norm=1.0)
                        self.opt_D.step()
                
                d_losses.append(d_total.item())
                gp_losses.append(gp.item())

                self.opt_G.zero_grad()
                z = torch.randn(batch_size, self.noise_dim, device=self.device)
                fake = self.G(z)
                g_loss = -self.D(fake).mean()
                
                if not (torch.isnan(g_loss) or torch.isinf(g_loss) or abs(g_loss.item()) > 1e6):
                    g_loss.backward()
                    torch.nn.utils.clip_grad_norm_(self.G.parameters(), max_norm=1.0)
                    self.opt_G.step()
                    g_losses.append(g_loss.item())

            if len(d_losses) > 0 and len(g_losses) > 0:
                avg_d = np.mean(d_losses)
                avg_g = np.mean(g_losses)
                avg_gp = np.mean(gp_losses)
                
                if (epoch + 1) % 5 == 0 or epoch == 0:
                    print(f"[QuantGAN] Epoch {epoch+1}/{self.epochs} | D={avg_d:.4f} | G={avg_g:.4f} | GP={avg_gp:.4f}")

    def validate(self, val_data):
        data = np.asarray(val_data, dtype=np.float32)
        
        if data.ndim == 1:
            data = data[:, None]
        
        if np.isnan(data).any():
            data_df = pd.DataFrame(data)
            data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)
            data = data_df.values
        
        if self.seq_len is None:
            raise RuntimeError("Model not trained yet")
        
        T = data.shape[0]
        
        if T < self.seq_len:
            temp_seq_len = (T // 4) * 4
            if temp_seq_len < 16:
                return 0.0
            
            data = self._make_windows(data, temp_seq_len)
            data = self._normalize_data(data, fit=False)
            
            n_windows, _, n_features = data.shape
            padded_data = np.zeros((n_windows, self.seq_len, n_features), dtype=np.float32)
            padded_data[:, :temp_seq_len, :] = data
            data = padded_data
        else:
            data = self._make_windows(data, self.seq_len)
            data = self._normalize_data(data, fit=False)
        
        real = torch.tensor(data, dtype=torch.float32).to(self.device)
        
        self.G.eval()
        self.D.eval()
        
        with torch.no_grad():
            batch_size = min(self.batch_size, real.size(0))
            z = torch.randn(batch_size, self.noise_dim, device=self.device)
            fake = self.G(z)
            score = (self.D(real[:batch_size]).mean() - self.D(fake).mean()).item()
        
        self.G.train()
        self.D.train()
        
        return score

    def generate(self, n_samples):
        if self.G is None:
            raise RuntimeError("Model not trained yet")

        self.G.eval()
        
        n_windows = max(1, (n_samples + self.seq_len - 1) // self.seq_len)
        
        out = []
        with torch.no_grad():
            for i in range(0, n_windows, self.batch_size):
                b = min(self.batch_size, n_windows - i)
                z = torch.randn(b, self.noise_dim, device=self.device)
                fake_windows = self.G(z).cpu().numpy()
                out.append(fake_windows)

        self.G.train()
        windows = np.concatenate(out, axis=0)
        
        windows = self._denormalize_data(windows)
        reconstructed = windows.reshape(-1, self.n_features)
        reconstructed = reconstructed[:n_samples]
        
        if reconstructed.shape[0] < n_samples:
            padding = np.repeat(reconstructed[-1:], n_samples - reconstructed.shape[0], axis=0)
            reconstructed = np.vstack([reconstructed, padding])
        
        return reconstructed

### FINGAN


In [143]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset

# -------------------------------
# Residual Block
# -------------------------------
class ResidualBlock(nn.Module):
    def __init__(self, channels):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv1d(channels, channels, 3, padding=1),
            nn.BatchNorm1d(channels),
            nn.LeakyReLU(0.2),
            nn.Conv1d(channels, channels, 3, padding=1),
            nn.BatchNorm1d(channels)
        )
        self.act = nn.LeakyReLU(0.2)

    def forward(self, x):
        return self.act(x + self.block(x))

# -------------------------------
# Generator (Deconv CNN)
# -------------------------------
class FinGAN_Generator(nn.Module):
    def __init__(self, noise_dim, seq_len, n_features, base_channels=64):
        super().__init__()
        assert seq_len % 8 == 0, "seq_len must be divisible by 8"

        self.start_len = seq_len // 8

        self.fc = nn.Sequential(
            nn.Linear(noise_dim, base_channels * self.start_len),
            nn.BatchNorm1d(base_channels * self.start_len),
            nn.LeakyReLU(0.2)
        )

        self.net = nn.Sequential(
            nn.ConvTranspose1d(base_channels, base_channels // 2, 4, 2, 1),
            nn.BatchNorm1d(base_channels // 2),
            nn.LeakyReLU(0.2),
            nn.ConvTranspose1d(base_channels // 2, base_channels // 4, 4, 2, 1),
            nn.BatchNorm1d(base_channels // 4),
            nn.LeakyReLU(0.2),
            nn.ConvTranspose1d(base_channels // 4, n_features, 4, 2, 1),
            nn.Tanh()
        )

    def forward(self, z):
        x = self.fc(z)
        x = x.view(z.size(0), -1, self.start_len)
        x = self.net(x)
        return x.permute(0, 2, 1)

# -------------------------------
# Critic (CNN)
# -------------------------------
class FinGAN_Critic(nn.Module):
    def __init__(self, seq_len, n_features, base_channels=64):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(n_features, base_channels, 5, padding=2),
            nn.LayerNorm([base_channels, seq_len]),
            nn.LeakyReLU(0.2),
            nn.Conv1d(base_channels, base_channels * 2, 4, stride=2, padding=1),
            nn.LayerNorm([base_channels * 2, seq_len // 2]),
            nn.LeakyReLU(0.2),
            nn.Conv1d(base_channels * 2, base_channels * 4, 4, stride=2, padding=1),
            nn.LayerNorm([base_channels * 4, seq_len // 4]),
            nn.LeakyReLU(0.2),
            nn.Flatten(),
            nn.Linear((seq_len // 4) * base_channels * 4, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 1)
        )

    def forward(self, x):
        return self.net(x.permute(0, 2, 1))

# -------------------------------
# FIN-GAN (WGAN-GP) - inherits from BaseGAN
# -------------------------------
class FinGAN(BaseGAN):
    def __init__(self, config=None):
        super().__init__(config or {})
        cfg = self.config

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        self.noise_dim = cfg.get("noise_dim", 100)
        self.lr_g = cfg.get("lr_g", 1e-4)
        self.lr_d = cfg.get("lr_d", 1e-4)
        self.batch_size = cfg.get("batch_size", 64)
        self.epochs = cfg.get("epochs", 50)
        self.lambda_gp = cfg.get("lambda_gp", 10.0)
        self.n_critic = cfg.get("n_critic", 3)
        self.base_channels = cfg.get("base_channels", 32)

        self.seq_len = None
        self.n_features = None
        self.data_min = None
        self.data_max = None
        self.data_mean = None
        self.data_std = None

        self.G = None
        self.D = None
        self.opt_G = None
        self.opt_D = None

    def _normalize_data(self, data, fit=True):
        if fit:
            self.data_min = np.min(data, axis=(0, 1), keepdims=True)
            self.data_max = np.max(data, axis=(0, 1), keepdims=True)
            self.data_mean = np.mean(data, axis=(0, 1), keepdims=True)
            self.data_std = np.std(data, axis=(0, 1), keepdims=True) + 1e-8
            
            print(f"[FinGAN] Data range: [{data.min():.4f}, {data.max():.4f}]")
            print(f"[FinGAN] Mean: {self.data_mean.squeeze():.6f}, Std: {self.data_std.squeeze():.6f}")
        
        data_range = self.data_max - self.data_min + 1e-8
        normalized = 2 * (data - self.data_min) / data_range - 1
        normalized = np.clip(normalized, -0.99, 0.99)
        
        if fit:
            print(f"[FinGAN] Normalized range: [{normalized.min():.4f}, {normalized.max():.4f}]")
        
        return normalized

    def _denormalize_data(self, data):
        data_range = self.data_max - self.data_min
        return (data + 1) / 2 * data_range + self.data_min

    def _make_windows(self, series, seq_len):
        T = series.shape[0]
        if T < seq_len:
            raise ValueError(f"Time series too short ({T}) for seq_len={seq_len}")
        
        windows = []
        for i in range(T - seq_len + 1):
            windows.append(series[i:i+seq_len])
        
        return np.array(windows)

    def _adjust_seq_len(self, data, factor=8):
        n, seq_len, n_features = data.shape
        target_len = (seq_len // factor) * factor
        if target_len != seq_len:
            print(f"[FinGAN] Truncated seq_len {seq_len} â†’ {target_len}")
        return data[:, :target_len, :]

    def _build_models(self, seq_len, n_features):
        self.seq_len = seq_len
        self.n_features = n_features

        self.G = FinGAN_Generator(
            self.noise_dim, seq_len, n_features, self.base_channels
        ).to(self.device)

        self.D = FinGAN_Critic(
            seq_len, n_features, self.base_channels
        ).to(self.device)

        self.opt_G = optim.Adam(self.G.parameters(), lr=self.lr_g, betas=(0.0, 0.9))
        self.opt_D = optim.Adam(self.D.parameters(), lr=self.lr_d, betas=(0.0, 0.9))
        
        self._init_weights(self.G)
        self._init_weights(self.D)
        
        print(f"[FinGAN] Models built - G params: {sum(p.numel() for p in self.G.parameters()):,}")
        print(f"[FinGAN] Models built - D params: {sum(p.numel() for p in self.D.parameters()):,}")
    
    def _init_weights(self, m):
        if isinstance(m, (nn.Conv1d, nn.ConvTranspose1d, nn.Linear)):
            nn.init.normal_(m.weight, 0.0, 0.02)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, (nn.BatchNorm1d, nn.LayerNorm)):
            if hasattr(m, 'weight') and m.weight is not None:
                nn.init.normal_(m.weight, 1.0, 0.02)
            if hasattr(m, 'bias') and m.bias is not None:
                nn.init.constant_(m.bias, 0)

    def _gradient_penalty(self, real, fake):
        batch_size = real.size(0)
        alpha = torch.rand(batch_size, 1, 1, device=self.device)
        
        interp = (alpha * real + (1 - alpha) * fake).requires_grad_(True)
        d_interp = self.D(interp)
        
        gradients = torch.autograd.grad(
            outputs=d_interp,
            inputs=interp,
            grad_outputs=torch.ones_like(d_interp),
            create_graph=True,
            retain_graph=True,
            only_inputs=True
        )[0]
        
        gradients = gradients.reshape(batch_size, -1)
        grad_norm = gradients.norm(2, dim=1)
        penalty = torch.mean((grad_norm - 1) ** 2)
        
        return penalty

    def train(self, train_data):
        data = np.asarray(train_data, dtype=np.float32)

        if data.ndim == 1:
            data = data[:, None]
        
        if np.isnan(data).any():
            nan_count = np.isnan(data).sum()
            print(f"[WARNING] Found {nan_count} NaN values, handling...")
            data_df = pd.DataFrame(data)
            data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)
            data = data_df.values
            print(f"[FinGAN] NaN handling complete")
        
        T, n_features = data.shape
        
        max_seq_len = (T // 8) * 8
        if max_seq_len < 8:
            raise ValueError(f"Time series too short ({T}) for FIN-GAN")
        
        if max_seq_len >= 256:
            seq_len = 256
        elif max_seq_len >= 128:
            seq_len = 128
        elif max_seq_len >= 64:
            seq_len = 64
        elif max_seq_len >= 32:
            seq_len = 32
        elif max_seq_len >= 16:
            seq_len = 16
        else:
            seq_len = max_seq_len
        
        print(f"[FinGAN] Creating windows: T={T}, seq_len={seq_len}, n_features={n_features}")
        
        data = self._make_windows(data, seq_len)
        
        if np.isnan(data).any():
            raise ValueError("NaN after windowing")
        
        data = self._normalize_data(data, fit=True)
        
        if np.isnan(data).any():
            raise ValueError("NaN after normalization")
        
        n_windows = data.shape[0]
        print(f"[FinGAN] Created {n_windows} windows")

        if self.G is None:
            self._build_models(seq_len, n_features)

        dataset = TensorDataset(torch.tensor(data, dtype=torch.float32))
        loader = DataLoader(dataset, self.batch_size, shuffle=True, drop_last=True)

        print(f"[FinGAN] Starting training with {len(loader)} batches per epoch")
        
        for epoch in range(self.epochs):
            d_losses = []
            g_losses = []
            gp_losses = []
            
            for batch_idx, (real,) in enumerate(loader):
                real = real.to(self.device)
                batch_size = real.size(0)
                
                if torch.isnan(real).any():
                    print(f"[ERROR] NaN in batch {batch_idx}")
                    continue

                for critic_iter in range(self.n_critic):
                    self.opt_D.zero_grad()
                    
                    z = torch.randn(batch_size, self.noise_dim, device=self.device)
                    
                    with torch.no_grad():
                        fake = self.G(z)
                    
                    d_real = self.D(real)
                    d_fake = self.D(fake)
                    
                    d_loss = d_fake.mean() - d_real.mean()
                    gp = self._gradient_penalty(real, fake)
                    d_total = d_loss + self.lambda_gp * gp
                    
                    if torch.isnan(d_total) or torch.isinf(d_total) or abs(d_total.item()) > 1e6:
                        print(f"[ERROR] Extreme D loss at epoch {epoch+1}, batch {batch_idx}")
                        break
                    
                    d_total.backward()
                    torch.nn.utils.clip_grad_norm_(self.D.parameters(), max_norm=0.5)
                    self.opt_D.step()
                
                d_losses.append(d_total.item())
                gp_losses.append(gp.item())

                self.opt_G.zero_grad()
                z = torch.randn(batch_size, self.noise_dim, device=self.device)
                fake = self.G(z)
                g_loss = -self.D(fake).mean()
                
                if torch.isnan(g_loss) or torch.isinf(g_loss) or abs(g_loss.item()) > 1e6:
                    print(f"[ERROR] Extreme G loss")
                    continue
                
                g_loss.backward()
                torch.nn.utils.clip_grad_norm_(self.G.parameters(), max_norm=0.5)
                self.opt_G.step()
                
                g_losses.append(g_loss.item())

            if len(d_losses) == 0 or len(g_losses) == 0:
                print(f"[ERROR] No valid losses in epoch {epoch+1}")
                break
                
            avg_d = np.mean(d_losses)
            avg_g = np.mean(g_losses)
            avg_gp = np.mean(gp_losses)
            
            if (epoch + 1) % 5 == 0 or epoch == 0:
                print(f"[FinGAN] Epoch {epoch+1}/{self.epochs} | D={avg_d:.4f} | G={avg_g:.4f} | GP={avg_gp:.4f}")

    def validate(self, val_data):
        data = np.asarray(val_data, dtype=np.float32)
        
        if data.ndim == 1:
            data = data[:, None]
        
        if np.isnan(data).any():
            data_df = pd.DataFrame(data)
            data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)
            data = data_df.values
        
        if self.seq_len is None:
            raise RuntimeError("Model not trained yet")
        
        T = data.shape[0]
        
        if T < self.seq_len:
            temp_seq_len = (T // 8) * 8
            if temp_seq_len < 8:
                return 0.0
            
            data = self._make_windows(data, temp_seq_len)
            data = self._normalize_data(data, fit=False)
            
            n_windows, _, n_features = data.shape
            padded_data = np.zeros((n_windows, self.seq_len, n_features), dtype=np.float32)
            padded_data[:, :temp_seq_len, :] = data
            data = padded_data
        else:
            data = self._make_windows(data, self.seq_len)
            data = self._normalize_data(data, fit=False)
        
        data = self._adjust_seq_len(data, factor=8)
        real = torch.tensor(data, dtype=torch.float32).to(self.device)
        
        self.G.eval()
        self.D.eval()
        
        with torch.no_grad():
            batch_size = min(self.batch_size, real.size(0))
            z = torch.randn(batch_size, self.noise_dim, device=self.device)
            fake = self.G(z)
            score = (self.D(real[:batch_size]).mean() - self.D(fake).mean()).item()
        
        self.G.train()
        self.D.train()
        
        return score

    def generate(self, n_samples):
        """Generate n_samples timesteps (not windows)"""
        if self.G is None:
            raise RuntimeError("Model not trained yet")

        self.G.eval()
        
        # Generate enough windows to cover n_samples timesteps
        n_windows = max(1, (n_samples + self.seq_len - 1) // self.seq_len)
        
        out = []
        with torch.no_grad():
            for i in range(0, n_windows, self.batch_size):
                b = min(self.batch_size, n_windows - i)
                z = torch.randn(b, self.noise_dim, device=self.device)
                fake_windows = self.G(z).cpu().numpy()
                out.append(fake_windows)

        self.G.train()
        windows = np.concatenate(out, axis=0)
        
        # Denormalize
        windows = self._denormalize_data(windows)
        
        # Flatten windows into continuous time series
        reconstructed = windows.reshape(-1, self.n_features)
        
        # Trim to exact n_samples
        reconstructed = reconstructed[:n_samples]
        
        # Pad if needed
        if reconstructed.shape[0] < n_samples:
            padding = np.repeat(reconstructed[-1:], n_samples - reconstructed.shape[0], axis=0)
            reconstructed = np.vstack([reconstructed, padding])
        
        return reconstructed

Helper: Load dataset

In [196]:
def load_dataset(path):
    df = pd.read_parquet(path)
    print(f"Loaded {path}: shape={df.shape}, NaN count={df.isna().sum().sum()}")
    return df.to_numpy().astype(np.float32)

Training Loop

In [197]:
def train_all_models(models, datasets):
    for ds_path in datasets:
        data = load_dataset(ds_path)
        print(f"\nðŸ“Œ Training on {ds_path}")

        for model in models:
            print(f"--- {model.name()} ---")
            model.train(data)


Validation Loop (Select Hyperparameters)

Goal: Tune hyperparameters and choose best config.

You will later integrate Optuna or grid search.

In [198]:
def validate_models(models, datasets):
    results = []

    for ds_path in datasets:
        data = load_dataset(ds_path)
        print(f"\nðŸ“Œ Validating on {ds_path}")

        for model in models:
            score = model.validate(data)
            print(f"{model.name()} â†’ validation score: {score:.4f}")

            results.append({
                "dataset": ds_path,
                "model": model.name(),
                "score": score
            })
    
    return pd.DataFrame(results)


Test Loop (Final Comparison)

This generates synthetic data and compares it to the real test dataset.

In [199]:
def test_models(models, datasets):
    results = []

    for ds_path in datasets:
        real = load_dataset(ds_path)
        print(f"\nðŸ“Œ Testing on {ds_path}")

        for model in models:
            synthetic = model.generate(len(real))
            metrics = compute_metrics(real, synthetic)
            print(f"{model.name()}: {metrics}")

            results.append({
                "dataset": ds_path,
                "model": model.name(),
                **metrics
            })
    
    return pd.DataFrame(results)



Run the Whole Pipeline

In [204]:
models = [
    TimeGAN(config={"hidden_dim": 24, "noise_dim": 24, "num_layers": 3, "lr": 1e-3, "epochs": 5, "batch_size": 128}),
    QuantGAN(config={"lr_g": 1e-4, "lr_d": 1e-4, "epochs": 5, "noise_dim": 100, "n_critic": 3, "lambda_gp": 10.0}),
    FinGAN(config={"lr": 5e-5, "epochs": 5})
]


In [205]:
train_all_models(models, train_datasets)


Loaded /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/BOVESPA_train.parquet: shape=(996, 1), NaN count=1

ðŸ“Œ Training on /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/BOVESPA_train.parquet
--- TimeGAN ---
[TimeGAN] NaN handling complete
[TimeGAN] Creating windows: T=996, seq_len=256, n_features=1
[TimeGAN] Data range: [-0.4368, 0.2922]
[TimeGAN] Mean: -0.028796, Std: 0.128135
[TimeGAN] Normalized range: [0.0000, 1.0000]
[TimeGAN] Created 741 windows
[TimeGAN] Models built:
  Embedder: 9,744
  Recovery: 10,825
  Generator: 11,400
  Supervisor: 7,800
  Discriminator: 10,825
[TimeGAN] Phase 1: Training Autoencoder...


  data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)


  Epoch 1/2 | Reconstruction Loss: 0.0316
[TimeGAN] Phase 2: Training Supervisor...
  Epoch 1/2 | Supervisor Loss: 0.0037
[TimeGAN] Phase 2: Training Supervisor...
  Epoch 1/2 | Supervisor Loss: 0.0037
[TimeGAN] Phase 3: Joint Adversarial Training...
[TimeGAN] Phase 3: Joint Adversarial Training...
[TimeGAN] Epoch 1/5 | D=1.3861 | G=9.1771
[TimeGAN] Epoch 1/5 | D=1.3861 | G=9.1771
[TimeGAN] Epoch 5/5 | D=1.3837 | G=2.2344
--- QuantGAN ---
[QuantGAN] NaN handling complete
[QuantGAN] Creating windows: T=996, seq_len=256, n_features=1
[QuantGAN] Data range: [-0.4368, 0.2922]
[QuantGAN] Mean: -0.028796, Std: 0.128135
[QuantGAN] Normalized range: [-0.9900, 0.9900]
[QuantGAN] Created 741 windows
[QuantGAN] Models built - G params: 483,329
[QuantGAN] Models built - D params: 115,489
[QuantGAN] Starting training with 11 batches per epoch
[TimeGAN] Epoch 5/5 | D=1.3837 | G=2.2344
--- QuantGAN ---
[QuantGAN] NaN handling complete
[QuantGAN] Creating windows: T=996, seq_len=256, n_features=1
[Qua

  data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)


[QuantGAN] Epoch 1/5 | D=7.7827 | G=0.3330 | GP=0.7796
[QuantGAN] Epoch 5/5 | D=3.2050 | G=1.8577 | GP=0.3179
--- FinGAN ---
[FinGAN] NaN handling complete
[FinGAN] Creating windows: T=996, seq_len=256, n_features=1
[FinGAN] Data range: [-0.4368, 0.2922]
[FinGAN] Mean: -0.028796, Std: 0.128135
[FinGAN] Normalized range: [-0.9900, 0.9900]
[FinGAN] Created 741 windows
[FinGAN] Models built - G params: 108,137
[FinGAN] Models built - D params: 1,139,329
[FinGAN] Starting training with 11 batches per epoch
[QuantGAN] Epoch 5/5 | D=3.2050 | G=1.8577 | GP=0.3179
--- FinGAN ---
[FinGAN] NaN handling complete
[FinGAN] Creating windows: T=996, seq_len=256, n_features=1
[FinGAN] Data range: [-0.4368, 0.2922]
[FinGAN] Mean: -0.028796, Std: 0.128135
[FinGAN] Normalized range: [-0.9900, 0.9900]
[FinGAN] Created 741 windows
[FinGAN] Models built - G params: 108,137
[FinGAN] Models built - D params: 1,139,329
[FinGAN] Starting training with 11 batches per epoch


  data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)


[FinGAN] Epoch 1/5 | D=-8.8484 | G=4.6469 | GP=0.2021
[FinGAN] Epoch 5/5 | D=-13.0147 | G=-1.5226 | GP=0.2848
Loaded /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/FTSE_train.parquet: shape=(1000, 1), NaN count=1

ðŸ“Œ Training on /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/FTSE_train.parquet
--- TimeGAN ---
[TimeGAN] NaN handling complete
[TimeGAN] Creating windows: T=1000, seq_len=256, n_features=1
[TimeGAN] Data range: [-0.6951, 0.3080]
[TimeGAN] Mean: -0.024689, Std: 0.175459
[TimeGAN] Normalized range: [0.0000, 1.0000]
[TimeGAN] Created 745 windows
[TimeGAN] Phase 1: Training Autoencoder...
[FinGAN] Epoch 5/5 | D=-13.0147 | G=-1.5226 | GP=0.2848
Loaded /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/FTSE_train.parquet: shape=(1000, 1), NaN count=1

ðŸ“Œ Training on /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/FTSE_train.par

  data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)


  Epoch 1/2 | Reconstruction Loss: 0.0428
[TimeGAN] Phase 2: Training Supervisor...
  Epoch 1/2 | Supervisor Loss: 0.0001
[TimeGAN] Phase 2: Training Supervisor...
  Epoch 1/2 | Supervisor Loss: 0.0001
[TimeGAN] Phase 3: Joint Adversarial Training...
[TimeGAN] Phase 3: Joint Adversarial Training...
[TimeGAN] Epoch 1/5 | D=1.3886 | G=1.9422
[TimeGAN] Epoch 1/5 | D=1.3886 | G=1.9422
[TimeGAN] Epoch 5/5 | D=1.3847 | G=1.2015
--- QuantGAN ---
[QuantGAN] NaN handling complete
[QuantGAN] Creating windows: T=1000, seq_len=256, n_features=1
[QuantGAN] Data range: [-0.6951, 0.3080]
[QuantGAN] Mean: -0.024689, Std: 0.175459
[QuantGAN] Normalized range: [-0.9900, 0.9900]
[QuantGAN] Created 745 windows
[QuantGAN] Starting training with 11 batches per epoch
[TimeGAN] Epoch 5/5 | D=1.3847 | G=1.2015
--- QuantGAN ---
[QuantGAN] NaN handling complete
[QuantGAN] Creating windows: T=1000, seq_len=256, n_features=1
[QuantGAN] Data range: [-0.6951, 0.3080]
[QuantGAN] Mean: -0.024689, Std: 0.175459
[QuantG

  data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)


[QuantGAN] Epoch 1/5 | D=3.1752 | G=2.2139 | GP=0.3078
[QuantGAN] Epoch 5/5 | D=1.2045 | G=3.6163 | GP=0.1015
--- FinGAN ---
[FinGAN] NaN handling complete
[FinGAN] Creating windows: T=1000, seq_len=256, n_features=1
[FinGAN] Data range: [-0.6951, 0.3080]
[FinGAN] Mean: -0.024689, Std: 0.175459
[FinGAN] Normalized range: [-0.9900, 0.9900]
[FinGAN] Created 745 windows
[FinGAN] Starting training with 11 batches per epoch
[QuantGAN] Epoch 5/5 | D=1.2045 | G=3.6163 | GP=0.1015
--- FinGAN ---
[FinGAN] NaN handling complete
[FinGAN] Creating windows: T=1000, seq_len=256, n_features=1
[FinGAN] Data range: [-0.6951, 0.3080]
[FinGAN] Mean: -0.024689, Std: 0.175459
[FinGAN] Normalized range: [-0.9900, 0.9900]
[FinGAN] Created 745 windows
[FinGAN] Starting training with 11 batches per epoch


  data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)


[FinGAN] Epoch 1/5 | D=-16.9868 | G=-1.7938 | GP=0.4414
[FinGAN] Epoch 5/5 | D=-16.2646 | G=-8.8974 | GP=0.4276
Loaded /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/MSCI_train.parquet: shape=(1004, 1), NaN count=1

ðŸ“Œ Training on /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/MSCI_train.parquet
--- TimeGAN ---
[TimeGAN] NaN handling complete
[TimeGAN] Creating windows: T=1004, seq_len=256, n_features=1
[TimeGAN] Data range: [-0.5521, 0.6546]
[TimeGAN] Mean: 0.006594, Std: 0.222930
[TimeGAN] Normalized range: [0.0000, 1.0000]
[TimeGAN] Created 749 windows
[TimeGAN] Phase 1: Training Autoencoder...
[FinGAN] Epoch 5/5 | D=-16.2646 | G=-8.8974 | GP=0.4276
Loaded /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/MSCI_train.parquet: shape=(1004, 1), NaN count=1

ðŸ“Œ Training on /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/MSCI_train.pa

  data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)


  Epoch 1/2 | Reconstruction Loss: 0.0486
[TimeGAN] Phase 2: Training Supervisor...
  Epoch 1/2 | Supervisor Loss: 0.0000
[TimeGAN] Phase 2: Training Supervisor...
  Epoch 1/2 | Supervisor Loss: 0.0000
[TimeGAN] Phase 3: Joint Adversarial Training...
[TimeGAN] Phase 3: Joint Adversarial Training...
[TimeGAN] Epoch 1/5 | D=1.3862 | G=1.3938
[TimeGAN] Epoch 1/5 | D=1.3862 | G=1.3938
[TimeGAN] Epoch 5/5 | D=1.3859 | G=1.0149
--- QuantGAN ---
[QuantGAN] NaN handling complete
[QuantGAN] Creating windows: T=1004, seq_len=256, n_features=1
[QuantGAN] Data range: [-0.5521, 0.6546]
[QuantGAN] Mean: 0.006594, Std: 0.222930
[QuantGAN] Normalized range: [-0.9900, 0.9900]
[QuantGAN] Created 749 windows
[QuantGAN] Starting training with 11 batches per epoch
[TimeGAN] Epoch 5/5 | D=1.3859 | G=1.0149
--- QuantGAN ---
[QuantGAN] NaN handling complete
[QuantGAN] Creating windows: T=1004, seq_len=256, n_features=1
[QuantGAN] Data range: [-0.5521, 0.6546]
[QuantGAN] Mean: 0.006594, Std: 0.222930
[QuantGAN

  data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)


[QuantGAN] Epoch 1/5 | D=0.7619 | G=3.9211 | GP=0.0824
[QuantGAN] Epoch 5/5 | D=-1.6702 | G=6.8889 | GP=0.0663
--- FinGAN ---
[FinGAN] NaN handling complete
[FinGAN] Creating windows: T=1004, seq_len=256, n_features=1
[FinGAN] Data range: [-0.5521, 0.6546]
[FinGAN] Mean: 0.006594, Std: 0.222930
[FinGAN] Normalized range: [-0.9900, 0.9900]
[FinGAN] Created 749 windows
[FinGAN] Starting training with 11 batches per epoch
[QuantGAN] Epoch 5/5 | D=-1.6702 | G=6.8889 | GP=0.0663
--- FinGAN ---
[FinGAN] NaN handling complete
[FinGAN] Creating windows: T=1004, seq_len=256, n_features=1
[FinGAN] Data range: [-0.5521, 0.6546]
[FinGAN] Mean: 0.006594, Std: 0.222930
[FinGAN] Normalized range: [-0.9900, 0.9900]
[FinGAN] Created 749 windows
[FinGAN] Starting training with 11 batches per epoch


  data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)


[FinGAN] Epoch 1/5 | D=-9.7452 | G=-11.5316 | GP=0.2048
[FinGAN] Epoch 5/5 | D=-9.4836 | G=-21.6810 | GP=0.1859
Loaded /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/NIFTY50_train.parquet: shape=(993, 1), NaN count=1

ðŸ“Œ Training on /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/NIFTY50_train.parquet
--- TimeGAN ---
[TimeGAN] NaN handling complete
[TimeGAN] Creating windows: T=993, seq_len=256, n_features=1
[TimeGAN] Data range: [-0.7768, 0.5173]
[TimeGAN] Mean: -0.013686, Std: 0.255715
[TimeGAN] Normalized range: [0.0000, 1.0000]
[TimeGAN] Created 738 windows
[TimeGAN] Phase 1: Training Autoencoder...
[FinGAN] Epoch 5/5 | D=-9.4836 | G=-21.6810 | GP=0.1859
Loaded /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/NIFTY50_train.parquet: shape=(993, 1), NaN count=1

ðŸ“Œ Training on /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/NIFTY5

  data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)


  Epoch 1/2 | Reconstruction Loss: 0.0352
[TimeGAN] Phase 2: Training Supervisor...
[TimeGAN] Phase 2: Training Supervisor...
  Epoch 1/2 | Supervisor Loss: 0.0000
[TimeGAN] Phase 3: Joint Adversarial Training...
  Epoch 1/2 | Supervisor Loss: 0.0000
[TimeGAN] Phase 3: Joint Adversarial Training...
[TimeGAN] Epoch 1/5 | D=1.3864 | G=1.1437
[TimeGAN] Epoch 1/5 | D=1.3864 | G=1.1437
[TimeGAN] Epoch 5/5 | D=1.3863 | G=0.9599
--- QuantGAN ---
[QuantGAN] NaN handling complete
[QuantGAN] Creating windows: T=993, seq_len=256, n_features=1
[QuantGAN] Data range: [-0.7768, 0.5173]
[QuantGAN] Mean: -0.013686, Std: 0.255715
[QuantGAN] Normalized range: [-0.9900, 0.9900]
[QuantGAN] Created 738 windows
[QuantGAN] Starting training with 11 batches per epoch
[TimeGAN] Epoch 5/5 | D=1.3863 | G=0.9599
--- QuantGAN ---
[QuantGAN] NaN handling complete
[QuantGAN] Creating windows: T=993, seq_len=256, n_features=1
[QuantGAN] Data range: [-0.7768, 0.5173]
[QuantGAN] Mean: -0.013686, Std: 0.255715
[QuantGAN

  data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)


[QuantGAN] Epoch 1/5 | D=-0.8934 | G=7.5089 | GP=0.0536
[QuantGAN] Epoch 5/5 | D=0.0885 | G=4.9412 | GP=0.0291
--- FinGAN ---
[FinGAN] NaN handling complete
[FinGAN] Creating windows: T=993, seq_len=256, n_features=1
[FinGAN] Data range: [-0.7768, 0.5173]
[FinGAN] Mean: -0.013686, Std: 0.255715
[FinGAN] Normalized range: [-0.9900, 0.9900]
[FinGAN] Created 738 windows
[FinGAN] Starting training with 11 batches per epoch
[QuantGAN] Epoch 5/5 | D=0.0885 | G=4.9412 | GP=0.0291
--- FinGAN ---
[FinGAN] NaN handling complete
[FinGAN] Creating windows: T=993, seq_len=256, n_features=1
[FinGAN] Data range: [-0.7768, 0.5173]
[FinGAN] Mean: -0.013686, Std: 0.255715
[FinGAN] Normalized range: [-0.9900, 0.9900]
[FinGAN] Created 738 windows
[FinGAN] Starting training with 11 batches per epoch


  data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)


[FinGAN] Epoch 1/5 | D=-12.4529 | G=-21.7716 | GP=0.2644
[FinGAN] Epoch 5/5 | D=-11.7588 | G=-24.7964 | GP=0.2708
Loaded /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/SHANGHAI_train.parquet: shape=(969, 1), NaN count=1

ðŸ“Œ Training on /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/SHANGHAI_train.parquet
--- TimeGAN ---
[TimeGAN] NaN handling complete
[TimeGAN] Creating windows: T=969, seq_len=256, n_features=1
[TimeGAN] Data range: [-0.2997, 0.3034]
[TimeGAN] Mean: -0.032045, Std: 0.087099
[TimeGAN] Normalized range: [0.0000, 1.0000]
[TimeGAN] Created 714 windows
[TimeGAN] Phase 1: Training Autoencoder...
[FinGAN] Epoch 5/5 | D=-11.7588 | G=-24.7964 | GP=0.2708
Loaded /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/SHANGHAI_train.parquet: shape=(969, 1), NaN count=1

ðŸ“Œ Training on /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/train/

  data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)


  Epoch 1/2 | Reconstruction Loss: 0.0399
[TimeGAN] Phase 2: Training Supervisor...
[TimeGAN] Phase 2: Training Supervisor...
  Epoch 1/2 | Supervisor Loss: 0.0000
  Epoch 1/2 | Supervisor Loss: 0.0000
[TimeGAN] Phase 3: Joint Adversarial Training...
[TimeGAN] Phase 3: Joint Adversarial Training...
[TimeGAN] Epoch 1/5 | D=1.3860 | G=1.1566
[TimeGAN] Epoch 1/5 | D=1.3860 | G=1.1566
[TimeGAN] Epoch 5/5 | D=1.3862 | G=0.9303
--- QuantGAN ---
[QuantGAN] NaN handling complete
[QuantGAN] Creating windows: T=969, seq_len=256, n_features=1
[QuantGAN] Data range: [-0.2997, 0.3034]
[QuantGAN] Mean: -0.032045, Std: 0.087099
[QuantGAN] Normalized range: [-0.9900, 0.9900]
[QuantGAN] Created 714 windows
[QuantGAN] Starting training with 11 batches per epoch
[TimeGAN] Epoch 5/5 | D=1.3862 | G=0.9303
--- QuantGAN ---
[QuantGAN] NaN handling complete
[QuantGAN] Creating windows: T=969, seq_len=256, n_features=1
[QuantGAN] Data range: [-0.2997, 0.3034]
[QuantGAN] Mean: -0.032045, Std: 0.087099
[QuantGAN

  data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)


[QuantGAN] Epoch 1/5 | D=-0.4918 | G=3.7333 | GP=0.0217
[QuantGAN] Epoch 5/5 | D=-0.8683 | G=2.5853 | GP=0.0421
--- FinGAN ---
[FinGAN] NaN handling complete
[FinGAN] Creating windows: T=969, seq_len=256, n_features=1
[FinGAN] Data range: [-0.2997, 0.3034]
[FinGAN] Mean: -0.032045, Std: 0.087099
[FinGAN] Normalized range: [-0.9900, 0.9900]
[FinGAN] Created 714 windows
[FinGAN] Starting training with 11 batches per epoch
[QuantGAN] Epoch 5/5 | D=-0.8683 | G=2.5853 | GP=0.0421
--- FinGAN ---
[FinGAN] NaN handling complete
[FinGAN] Creating windows: T=969, seq_len=256, n_features=1
[FinGAN] Data range: [-0.2997, 0.3034]
[FinGAN] Mean: -0.032045, Std: 0.087099
[FinGAN] Normalized range: [-0.9900, 0.9900]
[FinGAN] Created 714 windows
[FinGAN] Starting training with 11 batches per epoch


  data_df = data_df.fillna(method='ffill').fillna(method='bfill').fillna(0.0)


[FinGAN] Epoch 1/5 | D=-7.4301 | G=-25.2681 | GP=0.1448
[FinGAN] Epoch 5/5 | D=-6.9999 | G=-31.1334 | GP=0.1294
[FinGAN] Epoch 5/5 | D=-6.9999 | G=-31.1334 | GP=0.1294


In [206]:
val_results = validate_models(models, val_datasets)
val_results


Loaded /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/valid/BOVESPA_valid.parquet: shape=(125, 1), NaN count=0

ðŸ“Œ Validating on /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/valid/BOVESPA_valid.parquet
TimeGAN â†’ validation score: 0.5008
QuantGAN â†’ validation score: 3.5410
FinGAN â†’ validation score: 9.1565
Loaded /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/valid/FTSE_valid.parquet: shape=(125, 1), NaN count=0

ðŸ“Œ Validating on /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/valid/FTSE_valid.parquet
TimeGAN â†’ validation score: 0.5007
QuantGAN â†’ validation score: 4.8217
FinGAN â†’ validation score: 12.0362
Loaded /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/valid/MSCI_valid.parquet: shape=(126, 1), NaN count=0

ðŸ“Œ Validating on /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_file

Unnamed: 0,dataset,model,score
0,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,TimeGAN,0.500793
1,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,QuantGAN,3.540963
2,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,FinGAN,9.156475
3,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,TimeGAN,0.500746
4,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,QuantGAN,4.821686
5,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,FinGAN,12.036171
6,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,TimeGAN,0.500776
7,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,QuantGAN,4.570028
8,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,FinGAN,10.210884
9,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,TimeGAN,0.50074


In [207]:
test_results = test_models(models, test_datasets)
test_results


Loaded /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/test/BOVESPA_test.parquet: shape=(125, 1), NaN count=0

ðŸ“Œ Testing on /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/test/BOVESPA_test.parquet
TimeGAN: {'MSE': 0.005857334937900305, 'KS': np.float64(2.6054291453487907e-38)}
QuantGAN: {'MSE': 0.05963267385959625, 'KS': np.float64(2.439741350869797e-61)}
FinGAN: {'MSE': 0.04294469952583313, 'KS': np.float64(2.1799860619410366e-24)}
Loaded /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/test/FTSE_test.parquet: shape=(125, 1), NaN count=0

ðŸ“Œ Testing on /home/sobottka/BSE/Master_Thesis/bse-thesis-synthetic-data/data/processed_files/test/FTSE_test.parquet
TimeGAN: {'MSE': 0.028821803629398346, 'KS': np.float64(2.1927812846103578e-74)}
QuantGAN: {'MSE': 0.1302771270275116, 'KS': np.float64(5.481953211525894e-72)}
FinGAN: {'MSE': 0.08702515065670013, 'KS': np.float64(2.439741350869797e-61)

Unnamed: 0,dataset,model,MSE,KS
0,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,TimeGAN,0.005857,2.6054289999999995e-38
1,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,QuantGAN,0.059633,2.439741e-61
2,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,FinGAN,0.042945,2.179986e-24
3,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,TimeGAN,0.028822,2.192781e-74
4,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,QuantGAN,0.130277,5.481953e-72
5,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,FinGAN,0.087025,2.439741e-61
6,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,TimeGAN,0.00701,4.577128e-21
7,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,QuantGAN,0.063856,1.450525e-68
8,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,FinGAN,0.046393,6.836782e-23
9,/home/sobottka/BSE/Master_Thesis/bse-thesis-sy...,TimeGAN,0.015402,1.1589859999999999e-20
