In [65]:
from collections import defaultdict
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# data sorting and splitting

In [57]:
df = pd.read_csv("./data/events.csv")
event2id = {"view": 0, "addtocart": 1, "transaction": 2}
df["event"] = df["event"].map(event2id)
df = df.sort_values(["visitorid", "timestamp"])

# Group events by user
user_events = defaultdict(list)
for row in df.itertuples():
    user_events[row.visitorid].append((row.timestamp, row.event, row.itemid))

In [60]:
# Split users into train/val/test
user_ids = list(user_events.keys())
np.random.seed(42)
np.random.shuffle(user_ids)
n_total = len(user_ids)
train_users = user_ids[:int(0.7 * n_total)]
val_users = user_ids[int(0.7 * n_total):int(0.85 * n_total)]
test_users = user_ids[int(0.85 * n_total):]

In [61]:
len(train_users)

985305

# padding and truncating of user event sequence

In [62]:
# Build sequences
def build_sequences(user_ids, user_sequences, seq_len=50):
    X, y = [], []
    for uid in user_ids:
        events = user_sequences[uid]
        for i in range(1, len(events)):
            seq = events[max(0, i - seq_len):i]
            pad_len = seq_len - len(seq)
            if pad_len > 0:
                seq = [(0, 0, 0)] * pad_len + seq
            timestamps = [e[0] for e in seq]
            event_types = [e[1] for e in seq]
            item_ids = [e[2] for e in seq]
            time_diffs = [0] + [timestamps[j] - timestamps[j-1] for j in range(1, len(timestamps))]
            features = list(zip(event_types, time_diffs, item_ids))
            X.append(features)
            y.append(events[i][1])
    return X, y

X_train, y_train = build_sequences(train_users, user_events)
X_val, y_val = build_sequences(val_users, user_events)
X_test, y_test = build_sequences(test_users, user_events)

# Build vocab for item ids
all_item_ids = {itemid for seq in X_train + X_val + X_test for (_, _, itemid) in seq}
item2id = {item: idx + 1 for idx, item in enumerate(sorted(all_item_ids))}
item2id[0] = 0
num_item_ids = len(item2id)


# Dataset and Dataloader definition

In [63]:
# PyTorch Dataset
class EventSequenceDataset(Dataset):
    def __init__(self, X, y, item2id):
        self.X = X
        self.y = y
        self.item2id = item2id

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        event_types = torch.tensor([e[0] for e in self.X[idx]], dtype=torch.long)
        time_diffs = torch.tensor([e[1] for e in self.X[idx]], dtype=torch.float32)
        item_ids = torch.tensor([self.item2id.get(e[2], 0) for e in self.X[idx]], dtype=torch.long)
        label = torch.tensor(self.y[idx], dtype=torch.long)
        return event_types, time_diffs, item_ids, label

train_dataset = EventSequenceDataset(X_train, y_train, item2id)
val_dataset = EventSequenceDataset(X_val, y_val, item2id)
test_dataset = EventSequenceDataset(X_test, y_test, item2id)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128)
test_loader = DataLoader(test_dataset, batch_size=128)

# Model

In [73]:
class BehaviorRNN(nn.Module):
    def __init__(self, event_vocab_size, item_vocab_size, embed_dim=32, hidden_dim=64):
        super().__init__()
        self.event_embedding = nn.Embedding(event_vocab_size, embed_dim)
        self.item_embedding = nn.Embedding(item_vocab_size, embed_dim)
        self.time_embedding = nn.Linear(1, embed_dim)

        self.rnn = nn.GRU(embed_dim * 3, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 3)

    def forward(self, event_types, time_diffs, item_ids):
        e_embed = self.event_embedding(event_types)
        i_embed = self.item_embedding(item_ids)
        t_embed = self.time_embedding(time_diffs.unsqueeze(-1))

        x = torch.cat([e_embed, t_embed, i_embed], dim=-1)
        out, _ = self.rnn(x)
        out = out[:, -1, :]  # use the last time step
        logits = self.fc(out)
        return logits

# Instantiate model
model = BehaviorRNN(event_vocab_size=3, item_vocab_size=num_item_ids + 1)
model

BehaviorRNN(
  (event_embedding): Embedding(3, 32)
  (item_embedding): Embedding(150789, 32)
  (time_embedding): Linear(in_features=1, out_features=32, bias=True)
  (rnn): GRU(96, 64, batch_first=True)
  (fc): Linear(in_features=64, out_features=3, bias=True)
)

# Train

In [74]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 5

# to handle imbalance class problems
train_counts = np.array([893279, 45230, 16268])
class_weights = 1. / train_counts
class_weights = class_weights / class_weights.sum()
class_weights = torch.tensor(class_weights, dtype=torch.float32).to(device)

criterion = nn.CrossEntropyLoss(weight=class_weights)

def train_one_epoch(model, loader, optimizer):
    model.train()
    total_loss = 0
    for event_types, time_diffs, item_ids, labels in loader:
        event_types = event_types.to(device)
        time_diffs = time_diffs.to(device)
        item_ids = item_ids.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        logits = model(event_types, time_diffs, item_ids)
        # loss = F.cross_entropy(logits, labels)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * event_types.size(0)

    avg_loss = total_loss / len(loader.dataset)
    return avg_loss

def evaluate(model, loader):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for event_types, time_diffs, item_ids, labels in loader:
            event_types = event_types.to(device)
            time_diffs = time_diffs.to(device)
            item_ids = item_ids.to(device)
            labels = labels.to(device)

            logits = model(event_types, time_diffs, item_ids)
            # loss = F.cross_entropy(logits, labels)
            loss = criterion(logits, labels)
            total_loss += loss.item() * event_types.size(0)

            preds = logits.argmax(dim=1)
            correct += (preds == labels).sum().item()

    avg_loss = total_loss / len(loader.dataset)
    accuracy = correct / len(loader.dataset)
    return avg_loss, accuracy

In [75]:
import copy

patience = 3
best_val_loss = float('inf')
best_model_wts = None
epochs_no_improve = 0

for epoch in range(1, num_epochs + 1):
    train_loss = train_one_epoch(model, train_loader, optimizer)
    val_loss, val_acc = evaluate(model, val_loader)

    print(f"Epoch {epoch}: Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}, Val Acc={val_acc:.4f}")

    # Check if validation loss improved
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model_wts = copy.deepcopy(model.state_dict())
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print(f"Early stopping triggered after {epoch} epochs.")
            break

# Load best model weights before testing
model.load_state_dict(best_model_wts)

# Test evaluation
test_loss, test_acc = evaluate(model, test_loader)
print(f"Test Loss={test_loss:.4f}, Test Acc={test_acc:.4f}")


Epoch 1: Train Loss=1.0608, Val Loss=1.0552, Val Acc=0.8491
Epoch 2: Train Loss=1.0531, Val Loss=1.0697, Val Acc=0.7526
Epoch 3: Train Loss=1.0499, Val Loss=1.0947, Val Acc=0.8412
Epoch 4: Train Loss=1.0462, Val Loss=1.0735, Val Acc=0.8569
Early stopping triggered after 4 epochs.
Test Loss=1.0558, Test Acc=0.8598


# test the model with real data

In [76]:
def predict_next_event(model, user_events_seq, item2id, seq_len=50):
    """
    Predict next event for a given user's event sequence.
    
    Args:
        model: trained BehaviorRNN model
        user_events_seq: list of tuples (timestamp, event_type_str, item_id)
            e.g. [(timestamp1, "view", item1), (timestamp2, "addtocart", item2), ...]
        item2id: dict mapping item IDs to integer indices (same as training)
        seq_len: length of sequence to feed model

    Returns:
        predicted_event: string label of predicted next event ("view", "addtocart", "transaction")
        predicted_probs: softmax probabilities as numpy array
    """
    # Map event string to int
    event2id = {"view": 0, "addtocart": 1, "transaction": 2}
    
    # Convert raw events to training format: (timestamp, event_id, item_id)
    processed_seq = [(ts, event2id.get(ev, 0), item) for ts, ev, item in user_events_seq]

    # Use only last seq_len events, pad if needed
    seq = processed_seq[-seq_len:]
    pad_len = seq_len - len(seq)
    if pad_len > 0:
        seq = [(0, 0, 0)] * pad_len + seq
    
    timestamps = [e[0] for e in seq]
    event_types = [e[1] for e in seq]
    item_ids = [e[2] for e in seq]
    time_diffs = [0] + [timestamps[j] - timestamps[j-1] for j in range(1, len(timestamps))]
    
    # Map item_ids using item2id vocab, default to 0 if unseen
    item_ids_mapped = [item2id.get(iid, 0) for iid in item_ids]

    # Create tensors
    event_types_t = torch.tensor([event_types], dtype=torch.long).to(device)  # batch_size=1
    time_diffs_t = torch.tensor([time_diffs], dtype=torch.float32).to(device)
    item_ids_t = torch.tensor([item_ids_mapped], dtype=torch.long).to(device)

    model.eval()
    with torch.no_grad():
        logits = model(event_types_t, time_diffs_t, item_ids_t)
        probs = torch.softmax(logits, dim=1).cpu().numpy()[0]
        pred_id = np.argmax(probs)

    id2event = {v: k for k, v in event2id.items()}
    predicted_event = id2event[pred_id]

    return predicted_event, probs


In [104]:
# Example new user event sequence
new_user_seq = [
    (600000, "view", 601),               # Initial product view
    (600030, "view", 602),               # Related product
    (600060, "view", 601),               # Back to original product
    (600090, "addtocart", 601),          # Adds it to cart
    (600120, "view", 603),               # Still exploring alternatives
    (600150, "view", 601), 
    (600180, "addtocart", 603),  
]

pred_event, pred_probs = predict_next_event(model, new_user_seq, item2id)
print("Predicted next event:", pred_event)
print("Probabilities:", pred_probs)


Predicted next event: view
Probabilities: [0.44539365 0.3790203  0.17558601]


# save the model locally and transferred to ONNX

In [95]:
torch.save(model, "behavior_rnn_full.pth")

In [None]:
# load the model with the following code
# model = torch.load("behavior_rnn_full.pth")
# model.to(device)
# model.eval()


In [106]:
# !NOTE eval mode
model.eval()

# Create dummy inputs with the correct shape (batch_size=1, seq_len=50)
dummy_event_types = torch.zeros((1, 50), dtype=torch.long).to(device)
dummy_time_diffs = torch.zeros((1, 50), dtype=torch.float32).to(device)
dummy_item_ids = torch.zeros((1, 50), dtype=torch.long).to(device)

# Export to ONNX
torch.onnx.export(
    model,
    (dummy_event_types, dummy_time_diffs, dummy_item_ids),
    "behavior_rnn.onnx",
    input_names=["event_types", "time_diffs", "item_ids"],
    output_names=["logits"],
    dynamic_axes={
        "event_types": {0: "batch_size", 1: "seq_len"},
        "time_diffs": {0: "batch_size", 1: "seq_len"},
        "item_ids": {0: "batch_size", 1: "seq_len"},
        "logits": {0: "batch_size"}
    },
    opset_version=14,
)


In [109]:
import onnx

model = onnx.load("behavior_rnn.onnx")
print("Inputs:")
for i in model.graph.input:
    print(f" - {i.name}, shape: {[dim.dim_value for dim in i.type.tensor_type.shape.dim]}")

print("Outputs:")
for o in model.graph.output:
    print(f" - {o.name}, shape: {[dim.dim_value for dim in o.type.tensor_type.shape.dim]}")


Inputs:
 - event_types, shape: [0, 0]
 - time_diffs, shape: [0, 0]
 - item_ids, shape: [0, 0]
Outputs:
 - logits, shape: [0, 3]


In [110]:
import onnx
onnx_model = onnx.load("behavior_rnn.onnx")
onnx.checker.check_model(onnx_model)
print(onnx.helper.printable_graph(onnx_model.graph))


graph main_graph (
  %event_types[INT64, batch_sizexseq_len]
  %time_diffs[FLOAT, batch_sizexseq_len]
  %item_ids[INT64, batch_sizexseq_len]
) initializers (
  %event_embedding.weight[FLOAT, 3x32]
  %item_embedding.weight[FLOAT, 150789x32]
  %time_embedding.bias[FLOAT, 32]
  %fc.weight[FLOAT, 3x64]
  %fc.bias[FLOAT, 3]
  %onnx::MatMul_103[FLOAT, 1x32]
  %onnx::GRU_123[FLOAT, 1x192x96]
  %onnx::GRU_124[FLOAT, 1x192x64]
  %onnx::GRU_125[FLOAT, 1x384]
) {
  %/event_embedding/Constant_output_0 = Constant[value = <Scalar Tensor []>]()
  %/event_embedding/Gather_output_0 = Gather(%event_embedding.weight, %event_types)
  %/item_embedding/Gather_output_0 = Gather(%item_embedding.weight, %item_ids)
  %/Constant_output_0 = Constant[value = <Tensor>]()
  %/Unsqueeze_output_0 = Unsqueeze(%time_diffs, %/Constant_output_0)
  %/time_embedding/MatMul_output_0 = MatMul(%/Unsqueeze_output_0, %onnx::MatMul_103)
  %/time_embedding/Add_output_0 = Add(%time_embedding.bias, %/time_embedding/MatMul_output_0)


In [None]:
import torch
import torch.nn as nn
import torch.onnx

# Simple model that adds two numbers
class AddModel(nn.Module):
    def forward(self, x):
        return x.sum(dim=1, keepdim=True)  # Sum across columns

# Instantiate model
model = AddModel()
model.eval()

# Example input: batch of size 1 with 2 features
dummy_input = torch.tensor([[3.0, 5.0]])

# Export to ONNX
torch.onnx.export(
    model, 
    dummy_input, 
    "add_model.onnx", 
    input_names=["input"], 
    output_names=["sum"],
    dynamic_axes={"input": {0: "batch_size"}, "sum": {0: "batch_size"}},
    opset_version=11
)

print("✅ Model exported as 'add_model.onnx'")
