In [None]:
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

MAX_INPUT_LEN = 10
PREDICT_LEN = 5
BATCH_SIZE = 64
EPOCHS = 1000
EMB_DIM = 64
HIDDEN_DIM = 128
VOCAB_SIZE = 15298

Using device: mps


In [None]:
df = pd.read_csv("./kaggle_data/interactions_train.csv")
df.columns = ['user_id', 'item_id', 'timestamp']
df_sorted = df.sort_values(by=['user_id', 'timestamp'])
grouped = df_sorted.groupby('user_id')['item_id'].apply(list)

def is_step_one_user(seq, min_len=5, min_ratio=0.7):
    if len(seq) < min_len:
        return False
    diffs = [b - a for a, b in zip(seq[:-1], seq[1:])]
    return sum(d == 1 for d in diffs) / len(diffs) >= min_ratio

filtered_users = [(uid, seq) for uid, seq in grouped.items() if is_step_one_user(seq)]

train_samples = []
for _, items in filtered_users:
    for i in range(len(items) - PREDICT_LEN):
        full_input = items[:i + 1]
        input_seq = full_input[-MAX_INPUT_LEN:]
        base = input_seq[-1]
        target_abs = items[i + 1:i + 1 + PREDICT_LEN]
        target_offset = [t - base for t in target_abs]
        if all(1 <= t - base <= 100 for t in target_abs):
            train_samples.append((input_seq, target_offset))

class BookOffsetDataset(Dataset):
    def __init__(self, samples):
        self.samples = samples

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        x, y = self.samples[idx]
        return torch.tensor(x, dtype=torch.long), torch.tensor(y, dtype=torch.float)

def collate_fn(batch):
    xs, ys = zip(*batch)
    xs_padded = pad_sequence(xs, batch_first=True, padding_value=0)
    ys_stacked = torch.stack(ys)
    return xs_padded, ys_stacked

train_loader = DataLoader(BookOffsetDataset(train_samples), batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)

In [None]:
class TransformerPredictor(nn.Module):
    def __init__(
        self, vocab_size, emb_dim, hidden_dim, output_len, max_len=MAX_INPUT_LEN
    ):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
        self.positional_encoding = nn.Parameter(torch.randn(1, max_len, emb_dim))

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=emb_dim, nhead=4, dim_feedforward=hidden_dim
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=2)
        self.output_layer = nn.Sequential(
            nn.Linear(emb_dim, output_len), nn.ReLU()
        )

    def forward(self, x):
        pad_mask = x == 0
        seq_len = x.size(1)
        emb = self.embedding(x) + self.positional_encoding[:, :seq_len, :]
        emb = emb.permute(1, 0, 2)
        encoded = self.encoder(emb, src_key_padding_mask=pad_mask)
        pooled = encoded[-1]
        return self.output_layer(pooled)

In [None]:
model = TransformerPredictor(VOCAB_SIZE, EMB_DIM, HIDDEN_DIM, PREDICT_LEN).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()

for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        pred = model(x)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"[Epoch {epoch+1}] Loss: {total_loss:.4f}")

In [None]:
model.eval()
test_input = torch.tensor([[1350, 1350, 1350, 1350, 1350, 1351, 1351, 1350, 1350, 1350]], dtype=torch.long).to(device)
with torch.no_grad():
    offset_pred = model(test_input)
    base = test_input[0, -1]
    pred_item_ids = (offset_pred + base).round()
print("Item_id：", pred_item_ids.cpu().tolist())


In [7]:
torch.save(model, "transformer_model.pt")

In [9]:
torch.save(model.state_dict(), "transformer_checkpoint.pt")