# Full Multi-Asset GNN Notebook (GCLSTM Version)
Complete pipeline for training a temporal GNN on ADA/BTC/ETH LOB data.

In [24]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from pathlib import Path
from torch_geometric_temporal.signal import DynamicGraphTemporalSignal, temporal_signal_split
from torch_geometric_temporal.nn.recurrent import GCLSTM
import matplotlib.pyplot as plt
import seaborn as sns

print('Imports loaded.')

Imports loaded.


## Feature Engineering

In [25]:
def add_features(df):
    df = df.copy()

    df['return_1'] = np.log(df['midpoint']).diff()
    df['return_2'] = np.log(df['midpoint']).diff(2)
    df['vol_3'] = df['return_1'].rolling(3).std()
    df['vol_6'] = df['return_1'].rolling(6).std()
    df['mid_slope'] = df['midpoint'].diff()

    df['of_imbalance'] = (df['buys'] - df['sells']) / (df['buys'] + df['sells'] + 1e-6)

    df['bid_liq_near'] = df[[f"bids_notional_{i}" for i in range(3)]].sum(axis=1)
    df['ask_liq_near'] = df[[f"asks_notional_{i}" for i in range(3)]].sum(axis=1)
    df['bid_liq_mid'] = df[[f"bids_notional_{i}" for i in range(3, 8)]].sum(axis=1)
    df['ask_liq_mid'] = df[[f"asks_notional_{i}" for i in range(3, 8)]].sum(axis=1)

    df['lob_imbalance'] = (df['bid_liq_near'] - df['ask_liq_near']) / (df['bid_liq_near'] + df['ask_liq_near'] + 1e-6)

    df['microprice'] = (
        df['ask_liq_near'] * (df['midpoint'] + df['spread'] / 2) +
        df['bid_liq_near'] * (df['midpoint'] - df['spread'] / 2)
    ) / (df['bid_liq_near'] + df['ask_liq_near'] + 1e-6)

    df['microprice_delta'] = df['microprice'].diff()

    keep = [
        'midpoint','spread','return_1','return_2','vol_3','vol_6','mid_slope',
        'buys','sells','of_imbalance',
        'bids_distance_0','bids_distance_1','bids_distance_2',
        'asks_distance_0','asks_distance_1','asks_distance_2',
        'bid_liq_near','ask_liq_near','bid_liq_mid','ask_liq_mid',
        'lob_imbalance','microprice','microprice_delta'
    ]

    return df[keep].dropna()

## Graph Construction

In [26]:
def build_temporal_graph(df_A, df_B, df_C, window=24):
    A = add_features(df_A)
    B = add_features(df_B)
    C = add_features(df_C)

    merged = A.join(B, rsuffix='_BTC').join(C, rsuffix='_ETH').dropna()

    midpoint = merged['midpoint']
    y = (midpoint.shift(-1) > midpoint).astype(int).dropna()

    merged = merged.iloc[:-1]

    X, Y, EI = [], [], []

    edge_index = np.array([[0,1,0,2,1,2],[1,0,2,0,2,1]])

    for i in range(len(merged)-window):
        block = merged.iloc[i:i+window]
        X_t = []
        for t in range(window):
            row = block.iloc[t]

            ada = row[[c for c in block.columns if not (c.endswith('_BTC') or c.endswith('_ETH'))]].values
            btc = row[[c for c in block.columns if c.endswith('_BTC')]].values
            eth = row[[c for c in block.columns if c.endswith('_ETH')]].values

            X_t.append(np.vstack([ada, btc, eth]))

        X.append(np.array(X_t))
        EI.append(edge_index)
        Y.append(y.iloc[i+window-1])

    return DynamicGraphTemporalSignal(EI, None, X, np.array(Y))

## GCLSTM Model

In [27]:
class PriceDirectionGCLSTM(nn.Module):
    def __init__(self, in_channels, hidden_channels):
        super().__init__()
        self.gclstm = GCLSTM(in_channels, hidden_channels)
        self.fc = nn.Linear(hidden_channels * 3, 2)

    def forward(self, x, edge_index):
        h = None
        for t in range(x.shape[0]):
            h, _ = self.gclstm(x[t], edge_index, H=h)
        return self.fc(h.reshape(-1))

## Training Loop

In [37]:
def train_model(model, train_dataset, epochs=5):
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        losses = []
        for snapshot in train_dataset:
            x = torch.tensor(snapshot.x, dtype=torch.float)
            edge_idx = torch.tensor(snapshot.edge_index, dtype=torch.long)
            y = torch.tensor([snapshot.y], dtype=torch.long)

            pred = model(x, edge_idx)
            loss = criterion(pred.unsqueeze(0), y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            losses.append(loss.item())

        print(f'Epoch {epoch}: loss={np.mean(losses):.4f}')

Notebook ready. Load your CSV files and start training.

In [6]:
import pandas as pd
from pathlib import Path
def load_asset(asset):
    freq = '5min'
    DATA_DIR = Path('../dataset')
    path = DATA_DIR / f"{asset}_{freq}.csv"

    df = pd.read_csv(path)
    df['timestamp'] = pd.to_datetime(df["system_time"])
    df['timestamp'] = df['timestamp'].dt.round('min')
    df = df.sort_values('timestamp').set_index('timestamp')
    return df[['midpoint','bids_notional_0']]  
    
df_ADA = load_asset("ADA").rename(columns={'midpoint': 'ADA', 'bids_notional_0': 'vol_ADA'})
df_BTC = load_asset("BTC").rename(columns={'midpoint': 'BTC', 'bids_notional_0': 'vol_BTC'})
df_ETH = load_asset("ETH").rename(columns={'midpoint': 'ETH', 'bids_notional_0': 'vol_ETH'})
df = df_ADA.join(df_BTC).join(df_ETH)
print(df.shape)
df.head()


(3367, 6)


Unnamed: 0_level_0,ADA,vol_ADA,BTC,vol_BTC,ETH,vol_ETH
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-04-07 11:38:00+00:00,1.17255,2461.409912,55899.995,18997.929688,1970.965,1032.089966
2021-04-07 11:43:00+00:00,1.1839,5481.839844,56111.655,8416.75,1975.115,3847.350098
2021-04-07 11:48:00+00:00,1.1783,383.899994,56100.015,2.7,1969.295,7542.339844
2021-04-07 11:53:00+00:00,1.1815,2243.899902,56163.445,1703.689941,1971.535,2136.570068
2021-04-07 11:58:00+00:00,1.1933,2266.129883,56229.995,1438.400024,1974.995,75441.078125


In [30]:
df_feat = df.copy()
df_feat['ADA_ret'] = df.ADA.pct_change()
df_feat['BTC_ret'] = df.BTC.pct_change()
df_feat['ETH_ret'] = df.ETH.pct_change()

df_feat = df_feat.dropna()


  df_feat['BTC_ret'] = df.BTC.pct_change()
  df_feat['ETH_ret'] = df.ETH.pct_change()


In [31]:
df_feat.head()

Unnamed: 0_level_0,ADA,BTC,ETH,ADA_ret,BTC_ret,ETH_ret
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-04-07 11:43:00+00:00,1.1839,56111.655,1975.115,0.00968,0.003786,0.002106
2021-04-07 11:48:00+00:00,1.1783,56100.015,1969.295,-0.00473,-0.000207,-0.002947
2021-04-07 11:53:00+00:00,1.1815,56163.445,1971.535,0.002716,0.001131,0.001137
2021-04-07 11:58:00+00:00,1.1933,56229.995,1974.995,0.009987,0.001185,0.001755
2021-04-07 12:03:00+00:00,1.18365,56119.455,1962.77,-0.008087,-0.001966,-0.00619


In [32]:
edge_index = torch.tensor([
    [0, 0, 1, 1, 2, 2],
    [1, 2, 0, 2, 0, 1]
], dtype=torch.long)


In [33]:
class MultiAssetTemporalDataset:
    def __init__(self, df, window=20):
        self.df = df
        self.window = window
        self.assets = ['ADA', 'BTC', 'ETH']
        self.features = ['ADA', 'BTC', 'ETH', 'ADA_ret', 'BTC_ret', 'ETH_ret']

    def __len__(self):
        return len(self.df) - self.window - 1

    def __getitem__(self, idx):
        window_df = self.df.iloc[idx : idx + self.window]
        next_df = self.df.iloc[idx + self.window]

        # X: [T, N, F]
        X = []
        for t in range(self.window):
            row = window_df.iloc[t]
            X.append([
                [row.ADA, row.ADA_ret],
                [row.BTC, row.BTC_ret],
                [row.ETH, row.ETH_ret]
            ])
        X = torch.tensor(X, dtype=torch.float)

        # y: direction (classification)
        y = torch.tensor([
            int(next_df.ADA_ret > 0),
            int(next_df.BTC_ret > 0),
            int(next_df.ETH_ret > 0)
        ], dtype=torch.long)

        return X, edge_index, y


In [34]:
dataset = MultiAssetTemporalDataset(df_feat, window=20)

train_data = []
for i in range(len(dataset)):
    X, edge_idx, y = dataset[i]
    train_data.append({
        "x": X,
        "edge_index": edge_idx,
        "y": y
    })


In [35]:
class WrapperDataset(torch.utils.data.Dataset):
    def __init__(self, raw):
        self.raw = raw
    def __len__(self): return len(self.raw)
    def __getitem__(self, i):
        d = self.raw[i]
        return d["x"], d["edge_index"], d["y"]

loader = torch.utils.data.DataLoader(WrapperDataset(train_data), batch_size=1)


In [None]:
def train_model2(model, loader, epochs=5):
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        losses = []
        for x, edge_idx, y in loader:
            x = x.squeeze(0)    # [T, N, F]
            pred = model(x, edge_idx)
            
            loss = criterion(pred, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            losses.append(loss.item())
            
        print(f"Epoch {epoch}: loss={np.mean(losses):.4f}")


In [None]:
dataset = (df_ADA, df_BTC, df_ETH, window=24)

model = PriceDirectionGCLSTM(in_channels=2, hidden_channels=32)  # in_channels = число фичей на узел
train_model(model, dataset, epochs=10)


KeyError: 'midpoint'

In [47]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from pathlib import Path
from torch.utils.data import DataLoader, Dataset
from torch_geometric_temporal.nn.recurrent import GCLSTM

# -----------------------------
# Data loading
# -----------------------------
def load_asset(asset):
    freq = '5min'
    DATA_DIR = Path('../dataset')
    path = DATA_DIR / f"{asset}_{freq}.csv"
    df = pd.read_csv(path)
    df['timestamp'] = pd.to_datetime(df["system_time"])
    df['timestamp'] = df['timestamp'].dt.round('min')
    df = df.sort_values('timestamp').set_index('timestamp')
    return df[['midpoint']].rename(columns={'midpoint': asset})

df_ADA = load_asset("ADA")
df_BTC = load_asset("BTC")
df_ETH = load_asset("ETH")

df = df_ADA.join(df_BTC).join(df_ETH)

# -----------------------------
# Feature engineering
# -----------------------------
df_feat = df.copy()
for asset in ['ADA', 'BTC', 'ETH']:
    df_feat[f'{asset}_ret'] = df_feat[asset].pct_change(fill_method=None).fillna(0)

df_feat = df_feat.fillna(0)

# -----------------------------
# Fully connected graph (3 nodes)
# -----------------------------
edge_index = torch.tensor([
    [0,0,1,1,2,2],
    [1,2,0,2,0,1]
], dtype=torch.long)

# -----------------------------
# Temporal dataset
# -----------------------------
class MultiAssetTemporalDataset:
    def __init__(self, df, window=20):
        self.df = df
        self.window = window

    def __len__(self):
        return max(0, len(self.df) - self.window - 1)

    def __getitem__(self, idx):
        block = self.df.iloc[idx: idx + self.window]
        next_row = self.df.iloc[idx + self.window]

        X = []
        for t in range(self.window):
            r = block.iloc[t]
            X.append([
                [r.ADA, r.ADA_ret],
                [r.BTC, r.BTC_ret],
                [r.ETH, r.ETH_ret]
            ])

        X = torch.tensor(X, dtype=torch.float)

        y = torch.tensor([
            int(next_row.ADA_ret > 0),
            int(next_row.BTC_ret > 0),
            int(next_row.ETH_ret > 0)
        ], dtype=torch.long)

        return X, edge_index, y

dataset = MultiAssetTemporalDataset(df_feat, window=20)

# -----------------------------
# PyTorch wrapper
# -----------------------------
class WrapperDataset(Dataset):
    def __init__(self, raw):
        self.raw = raw
    def __len__(self):
        return len(self.raw)
    def __getitem__(self, i):
        return self.raw[i]

loader = DataLoader(WrapperDataset(dataset), batch_size=1, shuffle=True)

# -----------------------------
# GCLSTM model
# -----------------------------
class PriceDirectionGCLSTM(nn.Module):
    def __init__(self, in_channels, hidden_channels=32, K=2):
        super().__init__()
        self.gclstm = GCLSTM(in_channels, hidden_channels, K)
        self.fc = nn.Linear(hidden_channels, 2)

    def forward(self, x, edge_index):
        h = None
        c = None
        T = x.shape[0]

        for t in range(T):
            node_features = x[t]               # [3, F]
            h, c = self.gclstm(node_features, edge_index, H=h, C=c)

        out = self.fc(h)                       # h: [3, hidden] -> out: [3, 2]
        return out


# -----------------------------
# Training loop
# -----------------------------
def train_model(model, loader, epochs=5):
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        losses = []
        for x, e_idx, y in loader:
            x = x.squeeze(0)
            pred = model(x, e_idx[0])      # pred: [3, 2]
            loss = criterion(pred, y[0])   # y[0]: [3]
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
        print(f"Epoch {epoch} | Loss: {np.mean(losses):.4f}")


# -----------------------------
# Run
# -----------------------------
X, EI, y = dataset[0]
print("X shape:", X.shape)
print("Edge index shape:", EI.shape)
print("y shape:", y.shape)

model = PriceDirectionGCLSTM(in_channels=2)
train_model(model, loader, epochs=10)


X shape: torch.Size([20, 3, 2])
Edge index shape: torch.Size([2, 6])
y shape: torch.Size([3])
Epoch 0 | Loss: 0.6947
Epoch 1 | Loss: 0.6940
Epoch 2 | Loss: 0.6936
Epoch 3 | Loss: 0.6936
Epoch 4 | Loss: 0.6936
Epoch 5 | Loss: 0.6935
Epoch 6 | Loss: 0.6934
Epoch 7 | Loss: 0.6934
Epoch 8 | Loss: 0.6931
Epoch 9 | Loss: 0.6937


In [43]:
x = torch.rand(5,3,2)
edge_index = torch.tensor([
    [0,0,1,1,2,2],
    [1,2,0,2,0,1]
], dtype=torch.long)

model = GCLSTM(in_channels=2, out_channels=4, K=2)

h = None
for t in range(x.shape[0]):
    h, _ = model(x[t], edge_index, H=h)
print(h.shape)  # [3,4]

torch.Size([3, 4])
