In [37]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, TimeSeriesSplit
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
import networkx as nx;
import matplotlib.pyplot as plt
from torch_geometric.data import Data
import torch
import torch_geometric
from torch_geometric.nn import GCNConv
from sklearn.metrics import f1_score, roc_auc_score
import torch.nn.functional as F
from sklearn.ensemble import RandomForestClassifier
import seaborn as sns

### Data

In [10]:
# Load data
df_classes = pd.read_csv("elliptic_txs_classes.csv")
df_edgelist = pd.read_csv("elliptic_txs_edgelist.csv")
df_features = pd.read_csv("elliptic_txs_features.csv", header=None)

# Rename feature columns
df_features.columns = ['txId', 'timeStep'] + [f'localV{i}' for i in range(94)] + [f'aggregatedV{i}' for i in range(71)]

# Class mapping
class_mapping = {
    '2': 0,         # licit
    '1': 1,         # illicit
    'unknown': 2    # unknown
}
df_classes['class'] = df_classes['class'].map(class_mapping)

# txId mapping
txId_mapping = {txId: idx for idx, txId in enumerate(df_features['txId'].unique())}
df_classes['txId'] = df_classes['txId'].map(txId_mapping)
df_edgelist = df_edgelist.apply(lambda col: col.map(txId_mapping))
df_features['txId'] = df_features['txId'].map(txId_mapping)

# Merge features and classes
df = pd.merge(df_classes, df_features, on='txId')

# Filter out unknown classes
df = df[df['class'] != 2].reset_index(drop=True)

### Random Forest (RF)

In [38]:
# Group data by timeStep
time_steps = df['timeStep'].unique()
num_time_steps = len(time_steps)

# Determine split points for time steps
train_end = int(0.7 * num_time_steps)   # 70% for training

# Train test split based on time steps
train_time_steps = time_steps[:train_end]
test_time_steps = time_steps[train_end:]

train_mask = df['timeStep'].isin(train_time_steps)
test_mask = df['timeStep'].isin(test_time_steps)

train_df = df[train_mask].reset_index(drop=True)
test_df = df[test_mask].reset_index(drop=True)

In [41]:
train_x = train_df.drop(columns=['txId', 'timeStep', 'class']).values
train_y = train_df['class'].values
test_x = test_df.drop(columns=['txId', 'timeStep', 'class']).values
test_y = test_df['class'].values

# TimeSeriesSplit for temporal CV
tscv = TimeSeriesSplit(n_splits=5)

# Define hyperparameter grid
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5],
}

# Set up GridSearchCV
grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid,
    cv=tscv,
    scoring='f1',  # You can also try 'f1_macro' or 'f1_weighted'
    n_jobs=-1,     # Use all cores
    verbose=2
)

# Run grid search on train_x
grid_search.fit(train_x, train_y)

# Best estimator and params
print("Best Hyperparameters:")
print(grid_search.best_params_)

best_rf = grid_search.best_estimator_

Fitting 5 folds for each of 12 candidates, totalling 60 fits


[CV] END max_depth=10, min_samples_split=2, n_estimators=100; total time=   0.9s
[CV] END max_depth=10, min_samples_split=2, n_estimators=200; total time=   1.8s
[CV] END max_depth=10, min_samples_split=2, n_estimators=100; total time=   2.4s
[CV] END max_depth=10, min_samples_split=5, n_estimators=100; total time=   0.9s
[CV] END max_depth=10, min_samples_split=2, n_estimators=100; total time=   3.5s
[CV] END max_depth=10, min_samples_split=2, n_estimators=200; total time=   4.6s
[CV] END max_depth=10, min_samples_split=2, n_estimators=100; total time=   5.0s
[CV] END max_depth=10, min_samples_split=5, n_estimators=100; total time=   2.2s
[CV] END max_depth=10, min_samples_split=2, n_estimators=100; total time=   6.4s
[CV] END max_depth=10, min_samples_split=2, n_estimators=200; total time=   6.9s
[CV] END max_depth=10, min_samples_split=5, n_estimators=100; total time=   3.5s
[CV] END max_depth=10, min_samples_split=5, n_estimators=200; total time=   1.9s
[CV] END max_depth=10, min_s

In [42]:
test_preds = best_rf.predict(test_x)
print("=== Final Evaluation on Test Set (Best Model) ===")
print(classification_report(test_y, test_preds, target_names=["licit", "illicit"]))

=== Final Evaluation on Test Set (Best Model) ===
              precision    recall  f1-score   support

       licit       0.98      1.00      0.99     15587
     illicit       0.97      0.72      0.83      1083

    accuracy                           0.98     16670
   macro avg       0.97      0.86      0.91     16670
weighted avg       0.98      0.98      0.98     16670



### Graph Convolution Network (GCN)

In [43]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

data = Data(
    x=torch.tensor(df_features.drop(columns=['txId', 'timeStep']).values, dtype=torch.float32),
    edge_index=torch.tensor(df_edgelist.values.T, dtype=torch.long),
    y=torch.tensor(df_classes['class'].values, dtype=torch.long)
)

data = data.to(device)
print(f"Number of nodes: {data.num_nodes}")
print(f"Number of features: {data.num_features}")
print(f"Number of edges: {data.num_edges}")
print(f"Class distribution: {data.y.bincount()}")

# Filter out unknown classes (class 2)
known_mask = (data.y != 2)
filtered_x = data.x[known_mask]
filtered_y = data.y[known_mask]
filtered_features = df_features.iloc[known_mask.cpu().numpy()]

# Reset the index of filtered_features to align with filtered_y
filtered_features = filtered_features.reset_index(drop=True)

# Group data by timeStep
time_steps = filtered_features['timeStep'].unique()
num_time_steps = len(time_steps)

# Determine split points for time steps
train_end = int(0.7 * num_time_steps)  # 70% for training
val_end = int(0.8 * num_time_steps)   # 10% for validation (70% + 10%)

# Split time steps into train, validation, and test sets
train_time_steps = time_steps[:train_end]
val_time_steps = time_steps[train_end:val_end]
test_time_steps = time_steps[val_end:]

# Create masks based on time steps
train_mask = torch.tensor(
    filtered_features[filtered_features['timeStep'].isin(train_time_steps)].index.values, dtype=torch.long
)
val_mask = torch.tensor(
    filtered_features[filtered_features['timeStep'].isin(val_time_steps)].index.values, dtype=torch.long
)
test_mask = torch.tensor(
    filtered_features[filtered_features['timeStep'].isin(test_time_steps)].index.values, dtype=torch.long
)

# Extract train, validation, and test labels
train_y = filtered_y[train_mask]
val_y = filtered_y[val_mask]
test_y = filtered_y[test_mask]

# Print class distributions
print(f"Train class distribution: {train_y.bincount()}")
print(f"Validation class distribution: {val_y.bincount()}")
print(f"Test class distribution: {test_y.bincount()}")

Number of nodes: 203769
Number of features: 165
Number of edges: 234355
Class distribution: tensor([ 42019,   4545, 157205])
Train class distribution: tensor([26432,  3462])
Validation class distribution: tensor([5039,  447])
Test class distribution: tensor([10548,   636])


In [44]:
# Define GCN

class GCN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, 16)
        self.conv2 = GCNConv(16, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)
        return x
    
# Initialize the model
in_channels = data.num_features
out_channels = len(class_mapping) - 1  # Exclude unknown class
model = GCN(in_channels, out_channels).to(device)
print(model)
optimizer = torch.optim.Adam(model.parameters(), 
                             lr=0.01, 
                             weight_decay=0.0005)
criterion = torch.nn.CrossEntropyLoss()

data = data.to(device)

# Training loop
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out[train_mask], train_y)
    loss.backward()
    optimizer.step()
    return loss.item()

# Evaluation function with F1-score and AUC
def evaluate(mask, labels):
    model.eval()
    with torch.no_grad():
        out = model(data)
        logits = out[mask]  # Get logits for the masked nodes
        pred = logits.argmax(dim=1)  # Predicted class
        prob = F.softmax(logits, dim=1)[:, 1]  # Probability for the positive class (for AUC)

        # Accuracy
        correct = (pred == labels).sum().item()
        acc = correct / mask.size(0)

        # F1-score
        f1 = f1_score(labels.cpu().numpy(), pred.cpu().numpy(), average="weighted")

        # AUC (only if there are at least two classes in the mask)
        try:
            auc = roc_auc_score(labels.cpu().numpy(), prob.cpu().numpy())
        except ValueError:
            auc = float('nan')  # AUC is undefined if only one class is present

    return acc, f1, auc

# Training with validation
num_epochs = 100
for epoch in range(num_epochs):
    loss = train()
    train_acc, train_f1, train_auc = evaluate(train_mask, train_y)
    val_acc, val_f1, val_auc = evaluate(val_mask, val_y)
    print(f"Epoch {epoch+1}, Loss: {loss:.4f}, "
          f"Train Acc: {train_acc:.4f}, Train F1: {train_f1:.4f}, Train AUC: {train_auc:.4f}, "
          f"Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}, Val AUC: {val_auc:.4f}")

# Test the model
test_acc, test_f1, test_auc = evaluate(test_mask, test_y)
print(f"Test Accuracy: {test_acc:.4f}, Test F1: {test_f1:.4f}, Test AUC: {test_auc:.4f}")

GCN(
  (conv1): GCNConv(165, 16)
  (conv2): GCNConv(16, 2)
)
Epoch 1, Loss: 0.6365, Train Acc: 0.8588, Train F1: 0.8217, Train AUC: 0.4963, Val Acc: 0.8956, Val F1: 0.8713, Val AUC: 0.5155
Epoch 2, Loss: 0.4464, Train Acc: 0.8764, Train F1: 0.8275, Train AUC: 0.5103, Val Acc: 0.9100, Val F1: 0.8768, Val AUC: 0.5177
Epoch 3, Loss: 0.3996, Train Acc: 0.8821, Train F1: 0.8293, Train AUC: 0.5235, Val Acc: 0.9141, Val F1: 0.8780, Val AUC: 0.5113
Epoch 4, Loss: 0.3997, Train Acc: 0.8830, Train F1: 0.8295, Train AUC: 0.5327, Val Acc: 0.9149, Val F1: 0.8784, Val AUC: 0.5020
Epoch 5, Loss: 0.4091, Train Acc: 0.8782, Train F1: 0.8274, Train AUC: 0.5300, Val Acc: 0.9143, Val F1: 0.8781, Val AUC: 0.4930
Epoch 6, Loss: 0.4207, Train Acc: 0.8828, Train F1: 0.8297, Train AUC: 0.5449, Val Acc: 0.9154, Val F1: 0.8783, Val AUC: 0.4811
Epoch 7, Loss: 0.4124, Train Acc: 0.8831, Train F1: 0.8298, Train AUC: 0.5497, Val Acc: 0.9154, Val F1: 0.8783, Val AUC: 0.4713
Epoch 8, Loss: 0.4050, Train Acc: 0.8837, T