In [2]:
import pyarrow as pa
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, Model
import pyarrow.parquet as pq
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchsde  # For SDE solvers
import numpy as np
import pandas as pd

# Data Preprocessing

In [None]:
# Read in parquet file 
table = pq.read_table("/Users/167011/Documents/MQF/Thesis/Deribit_Data/deribit_options_2025-01-30_100k_rows.parquet")
# Convert to Panadas DataFrame
df = table.to_pandas()

print(df.columns)

Index(['timestamp', 'state', 'index_price', 'instrument_name', 'last_price',
       'settlement_price', 'min_price', 'max_price', 'open_interest',
       'mark_price', 'best_ask_price', 'best_bid_price', 'interest_rate',
       'mark_iv', 'bid_iv', 'ask_iv', 'underlying_price', 'underlying_index',
       'best_ask_amount', 'best_bid_amount', 'estimated_delivery_price',
       'delivery_price', 'stats_high', 'stats_low', 'stats_price_change',
       'stats_volume', 'stats_volume_usd', 'greeks_delta', 'greeks_gamma',
       'greeks_rho', 'greeks_theta', 'greeks_vega', 'datetime'],
      dtype='object')


In [10]:
# Define log-price to ensure positive values
df['log_price'] = np.log(df['underlying_price'])
# Define log-return
df['log_return'] = df['log_price'].diff().fillna(0)

# Feature selection
drift_features = ['index_price', 'underlying_price', 'stats_price_change',
                  'open_interest', 'stats_volume', 'stats_volume_usd', 
                  'interest_rate', 'best_ask_price', 'best_bid_price', 
                  'best_ask_amount', 'best_bid_amount']

diffusion_features = ['mark_iv', 'bid_iv', 'ask_iv', 'greeks_vega', 'greeks_gamma',
                      'best_ask_price', 'best_bid_price']

# Define drift and diffusion coefficients (Parametrised by neural networks)
class NeuralDrift(nn.Module):
    def __init__(self, input_dim, hidden_dim=100):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.Tanh(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.Tanh(),
            nn.Linear(hidden_dim, 1)  # Output is 1D drift
        )
    
    def forward(self, t, x):
        return self.net(x)

class NeuralDiffusion(nn.Module):
    def __init__(self, input_dim, hidden_dim=100):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.Tanh(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.Tanh(),
            nn.Linear(hidden_dim, 1),
            nn.Softplus()  # Ensure non-negative volatility
        )

    def forward(self, t, x):
        return self.net(x)

# Instantiate networks
drift_net = NeuralDrift(len(drift_features))
diffusion_net = NeuralDiffusion(len(diffusion_features))

# Combine all features
features = list(set(drift_features + diffusion_features))

# Index mappings for Drift and Diffusion features
drift_idx = [features.index(f) for f in drift_features]
diff_idx = [features.index(f) for f in diffusion_features]

# Define Neural SDE
class NeuralSDE(torchsde.SDEIto):
    def __init__(self, drift_net, diffusion_net, drift_idx, diff_idx):
        super().__init__(noise_type="diagonal")
        self.drift_net = drift_net
        self.diffusion_net = diffusion_net
        self.drift_idx = drift_idx
        self.diff_idx = diff_idx
    
    def f(self, t, x):  # Drift function
        # Selecting drift features
        x_drift = x[:, self.drift_idx]
        drift_component = self.drift_net(t, x_drift)
        # Init full drift vector (with zeros)
        full_drift = torch.zeros_like(x)
        # Insert drift component into full drift vector
        full_drift[:, self.drift_idx] = drift_component
        return full_drift
    
    def g(self, t, x):  # Diffusion function
        # Selecting diffusion features
        x_diff = x[:, self.diff_idx]
        diffusion_component = self.diffusion_net(t, x_diff)
        # Init full diffusion vector (with zeros)
        full_diffusion = torch.zeros_like(x)
        # Insert diffusion component into full diffusion vector
        full_diffusion[:, self.diff_idx] = diffusion_component
        return full_diffusion

# Instantiate SDE model with the index lists
sde_model = NeuralSDE(drift_net, diffusion_net, drift_idx, diff_idx)

# Defining Wasserstein Loss
def wasserstein_loss(model, real_samples, batch_size=64):
    """Computes Wasserstein-1 distance loss"""
    # Generate synthetic samples
    num_steps = 100
    # t = torch.linspace(0, 1, steps=len(real_samples)).reshape(-1, 1) 
    t = torch.linspace(0, 1, steps=num_steps).reshape(-1, 1)
    # Generate batch of initial conditions
    x0 = real_samples[0].view(1, -1).repeat(batch_size, 1)
    # Generate synthetic trajectories
    generated_samples = torchsde.sdeint(model, x0, t)
    # Take the final time step for each trajectory
    generated_final = generated_samples[-1]
    # Sample a batch of real samples (shape: batch_size x state_dim)
    idx = torch.randperm(real_samples.shape[0])[:batch_size]
    real_batch = real_samples[idx]
    
    # Sort each column of the generated and real samples and compute the mean absolute difference
    real_sorted, _ = real_batch.sort(dim=0)
    gen_sorted, _ = generated_final.sort(dim=0)
    loss = torch.mean(torch.abs(real_sorted - gen_sorted))
    return loss


# Training the model
optimizer = optim.Adam(list(drift_net.parameters()) + list(diffusion_net.parameters()), lr=1e-3)

num_epochs = 1000
for epoch in range(num_epochs):
    real_samples = torch.tensor(df[features].values, dtype=torch.float32)
    
    optimizer.zero_grad()
    loss = wasserstein_loss(sde_model, real_samples, batch_size=64)
    loss.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print(f"Epoch {epoch}: Loss = {loss.item()}")





Epoch 0: Loss = nan
Epoch 100: Loss = nan


KeyboardInterrupt: 

In [9]:
torch.tensor(df[features].values, dtype=torch.float32)[0].view(1, -1)

tensor([[       nan, 0.0000e+00, 1.0380e+05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         1.2993e+02, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 1.6000e-03, 1.0382e+05, 0.0000e+00]])