In [2]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.nn.utils.rnn import pad_sequence
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from collections import defaultdict
import random
from torch.utils.data import Dataset

In [3]:
train_dataframes = []
#for i in tqdm(range(1, 11)):
    #train_dataframes.append(pd.read_csv(f'/kaggle/input/product-re-purchase-prediction/data-train/data-train/train_data_part_{i}.csv'))
train_dataframes.append(pd.read_csv(f'/kaggle/input/product-re-purchase-prediction/data-train/data-train/train_data_part_1.csv'))
train_data = pd.concat(train_dataframes, ignore_index=True)

del train_dataframes

products_data = pd.read_csv('/kaggle/input/product-re-purchase-prediction/data-train/data-train/products_data.csv', low_memory=False)
test_data = pd.read_csv('/kaggle/input/product-re-purchase-prediction/data-train/data-train/test_data.csv')

In [4]:
train_data['date'] = pd.to_datetime(train_data['date'])

# Add recency attribute
latest_date = train_data['date'].max()  # Find the latest date in the dataset
train_data['recency'] = (latest_date - train_data['date']).dt.days  # Calculate days since last purchase

# Group by customer_id and product_id to calculate quantity and most recent purchase
customer_product_data = train_data.groupby(['customer_id', 'product_id']).agg({
    'quantity': 'sum',
    'recency': 'min'  # Minimum days since purchase (most recent)
}).reset_index()

# Normalize quantity and recency scores
customer_product_data['quantity_score'] = customer_product_data['quantity'] / customer_product_data['quantity'].max()
customer_product_data['recency_score'] = 1 - (customer_product_data['recency'] / customer_product_data['recency'].max())  # Recent = higher score

# Merge frequency data into customer_product_data
frequency = train_data.groupby(['customer_id', 'product_id'])['transaction_id'].count().reset_index(name='frequency')
customer_product_data = customer_product_data.merge(frequency, on=['customer_id', 'product_id'], how='left')

# Fill any missing frequency values (if any product has no transactions counted, assume 0)
customer_product_data['frequency'] = customer_product_data['frequency'].fillna(0)

# Normalize frequency score
customer_product_data['frequency_score'] = customer_product_data['frequency'] / customer_product_data['frequency'].max()

# Compute final score
# Define the set of popular items
popular_items = {'Product_23971', 'Product_28633', 'Product_39751', 'Product_20421', 'Product_63301', 'Product_57942'}

# Add a column to indicate if a product is popular
customer_product_data['is_popular'] = customer_product_data['product_id'].isin(popular_items).astype(int)

Best_alpha=0.03
Best_beta=0.87
Best_gamma=0.1
Best_leverage=0.0019395677472984205

# Recalculate the final score, adding leverage for popular items
customer_product_data['final_score'] = (
    Best_alpha * customer_product_data['quantity_score'] + 
    Best_beta * customer_product_data['frequency_score'] +
    Best_gamma * customer_product_data['recency_score'] +
    Best_leverage * customer_product_data['is_popular']  # Add leverage
)

# Merge final score back into train_data
train_data = train_data.merge(
    customer_product_data[['customer_id', 'product_id', 'final_score']],
    on=['customer_id', 'product_id'],
    how='left'
)

# Merge train_data with products_data
#data_train = pd.merge(train_data, products_data, on="product_id", how="left")
data_train = train_data.copy()

# Sort data_train by date
data_train = data_train.sort_values(by='date')
data_train.reset_index(drop=True, inplace=True)

# Drop unnecessary columns
data_train = data_train.drop(columns=['date'])

# Move final_score to the last column
final_score_column = data_train.pop('final_score')  # Remove `final_score` and store it
data_train['final_score'] = final_score_column     # Reinsert `final_score` at the end

print(data_train.head(1))
print("Finish Data Preparation!")

        transaction_id     customer_id     product_id  has_loyality_card  \
0  Transaction_2754179  Household_8373  Product_73736                  0   

    store_id  is_promo  quantity format order_channel  recency  final_score  
0  Store_552         0       1.0  DRIVE       WEBSITE      729     0.109268  
Finish Data Preparation!


In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Step 1: Split User-Item Sequences for Train and Validation
user_item_sequences = defaultdict(list)

# Construct sequences for each user
for _, row in data_train.iterrows():
    user_item_sequences[row['customer_id']].append(row['product_id'])

# Convert to a dictionary for easy access
user_item_sequences = dict(user_item_sequences)

# Step 2: Split users into train and validation sets
train_sequences = {}
val_sequences = {}

# Split users into train and validation sets (80% training, 20% validation)
all_users = list(user_item_sequences.keys())
random.shuffle(all_users)

val_split_ratio = 0.2
val_user_count = int(len(all_users) * val_split_ratio)
val_users = all_users[:val_user_count]
train_users = all_users[val_user_count:]

# Create separate sequences for train and validation
for user in train_users:
    train_sequences[user] = user_item_sequences[user][:-1]  # All except last item for training
for user in val_users:
    val_sequences[user] = user_item_sequences[user][-1:]  # The last item for validation

# Step 3: Encode Users and Items
user_encoder = {user: idx for idx, user in enumerate(user_item_sequences.keys())}
item_encoder = {item: idx for idx, item in enumerate(set(data_train['product_id']))}

# Step 4: Prepare Sequences for Train and Validation
def encode_sequences(sequences, max_seq_length):
    encoded_sequences = {
        user_encoder[user]: [item_encoder[item] for item in items]
        for user, items in sequences.items()
    }

    padded_sequences = []
    targets = []
    users = []

    for user, seq in encoded_sequences.items():
        if len(seq) >= max_seq_length:
            seq = seq[-max_seq_length:]  # Keep only the last `max_seq_length` items
        else:
            seq = [0] * (max_seq_length - len(seq)) + seq  # Pad with zeros
        
        padded_sequences.append(seq)
        targets.append(seq[-1])  # Predict the last item in the sequence
        users.append(user)

    return torch.tensor(padded_sequences, dtype=torch.long), torch.tensor(targets, dtype=torch.long), torch.tensor(users, dtype=torch.long)

max_seq_length = 10
train_padded_sequences, train_targets, train_users = encode_sequences(train_sequences, max_seq_length)
val_padded_sequences, val_targets, val_users = encode_sequences(val_sequences, max_seq_length)

# Step 5: Create DataLoaders for Train and Validation Sets
train_dataset = TensorDataset(train_users, train_padded_sequences, train_targets)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

val_dataset = TensorDataset(val_users, val_padded_sequences, val_targets)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

# Hyperparameters
num_items = len(item_encoder)
embedding_dim = 50
max_seq_length = train_padded_sequences.size(1)
num_h_filters = 16
num_v_filters = 8
horizontal_filter_sizes = [2, 3, 4]
num_negatives = 30
batch_size = 64
epochs = 10
learning_rate = 0.001
weight_decay = 1e-4

# Load product features from products_data
product_features = data_train.drop(columns=['transaction_id',	'customer_id',	'product_id', 'store_id', 'format',	'order_channel']).values  # Drop non-feature columns

# Ensure product IDs are encoded
product_encoder = {item: idx for idx, item in enumerate(products_data['product_id'].unique())}
encoded_product_ids = [product_encoder[pid] for pid in train_data['product_id']]

# Align features with product IDs
item_features_tensor = torch.zeros(len(product_encoder), product_features.shape[1])
for pid, features in zip(encoded_product_ids, product_features):
    item_features_tensor[pid] = torch.tensor(features)

feature_dim = item_features_tensor.size(1)  # Get feature dimension

# Ensure consistent user and item encoding
num_users = len(user_encoder)
num_items = len(item_encoder)
                
print("Finish Data Preparation!")

Finish Data Preparation!


In [6]:
# Candidate generation function
def get_candidates(user_seq, candidate_size=50):
    global_top_items = torch.arange(num_items)[:candidate_size]
    return global_top_items

class CaserDatasetWithNegatives(Dataset):
    def __init__(self, sequences, targets, num_items, num_negatives=1):
        self.sequences = sequences
        self.targets = targets
        self.num_items = num_items
        self.num_negatives = num_negatives

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        sequence = self.sequences[idx]
        target = self.targets[idx]

        # Generate negative samples
        negatives = []
        while len(negatives) < self.num_negatives:
            neg = random.randint(0, self.num_items - 1)
            if neg != target:  # Ensure it's not the positive item
                negatives.append(neg)

        # Ensure everything is returned as a tensor
        return {
            "user_id": torch.tensor(idx, dtype=torch.long),
            "sequence": torch.tensor(sequence, dtype=torch.long),
            "target": torch.tensor(target, dtype=torch.long),
            "negatives": torch.tensor(negatives, dtype=torch.long),
        }

# Create DataLoader for the training set with negative samples
train_dataset = CaserDatasetWithNegatives(
    sequences=train_padded_sequences,
    targets=train_targets,
    num_items=num_items,
    num_negatives=num_negatives,
)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Similarly, create DataLoader for the validation set
val_dataset = CaserDatasetWithNegatives(
    sequences=val_padded_sequences,
    targets=val_targets,
    num_items=num_items,
    num_negatives=num_negatives,
)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

print("Finish Data Preprocessing!")

Finish Data Preprocessing!


In [7]:
# Caser Model
class Caser(nn.Module):
    def __init__(self, num_items, embedding_dim, num_h_filters, num_v_filters):
        super(Caser, self).__init__()
        self.num_items = num_items
        self.embedding_dim = embedding_dim
        self.num_h_filters = num_h_filters
        self.num_v_filters = num_v_filters

        # Embedding layers for items
        self.item_embeddings = nn.Embedding(num_items, embedding_dim)

        # Horizontal and vertical convolutional layers
        self.h_conv = nn.Conv1d(in_channels=embedding_dim, out_channels=num_h_filters, kernel_size=3, padding=1)
        self.v_conv = nn.Conv1d(in_channels=embedding_dim, out_channels=num_v_filters, kernel_size=3, padding=1)

        # Fully connected layers
        self.fc1 = nn.Linear(num_h_filters + num_v_filters, 50)  # Adjust the input size based on combined output
        self.fc2 = nn.Linear(50, num_items)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, user_ids, item_seq, item_features):
        item_embeds = self.item_embeddings(item_seq)  # Shape: (batch_size, seq_len, embedding_dim)

        # Horizontal and vertical convolutions
        h_out = self.h_conv(item_embeds.transpose(1, 2))  # Shape: (batch_size, num_h_filters, seq_len)
        v_out = self.v_conv(item_embeds.transpose(1, 2))  # Shape: (batch_size, num_v_filters, seq_len)

        # Apply dropout to convolution outputs
        h_out = self.dropout(h_out)
        v_out = self.dropout(v_out)

        
        # Global average pooling for each output
        h_out = h_out.mean(dim=2)  # Shape: (batch_size, num_h_filters)
        v_out = v_out.mean(dim=2)  # Shape: (batch_size, num_v_filters)

        # Concatenate the outputs from the two convolutions
        combined_output = torch.cat([h_out, v_out], dim=1)  # Shape: (batch_size, num_h_filters + num_v_filters)

        # Fully connected layers
        x = torch.relu(self.fc1(combined_output))  # Shape: (batch_size, 50)
        x = self.dropout(x)
        scores = self.fc2(x)  # Shape: (batch_size, 1)

        return scores

# Instantiate Model
model = Caser(
    num_items=num_items,                  # Total number of items
    embedding_dim=embedding_dim,         # Dimensionality of embeddings
    num_h_filters=num_h_filters,         # Number of horizontal filters
    num_v_filters=num_v_filters,         # Number of vertical filters
).to(device)


# Loss and Optimizer
criterion = nn.BCEWithLogitsLoss()  # For binary classification between positive and negatives
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

# Step 6: Adjust the Model Training Loop
train_losses=[]
val_losses=[]
hitrate10_scores=[]

epochs = 30
for epoch in range(epochs):
    model.train()
    train_loss = 0

    # Training loop
    for batch in train_loader:
        # Unpack dictionary from the dataset
        user_ids = batch['sequence']
        sequences = batch['sequence']
        targets = batch['target']
        negatives = batch["negatives"].to(device)  # Shape: (batch_size, num_negatives)
    
        user_ids, sequences, targets = user_ids.to(device), sequences.to(device), targets.to(device)
    
        optimizer.zero_grad()
    
        # Forward pass for positive items
        positive_scores = model(user_ids, sequences, item_features_tensor.to(device))  # Shape: (batch_size, num_items)
        positive_scores = torch.gather(positive_scores, 1, targets.unsqueeze(1)).squeeze(1)  # Shape: (batch_size,)
        
        # Negative sampling
        negative_scores = model(user_ids, sequences, item_features_tensor.to(device))  # Shape: (batch_size, num_items)
        negative_scores = torch.gather(negative_scores, 1, negatives)  # Shape: (batch_size, num_negatives)
        
        # Reshape positive_scores for concatenation
        positive_scores = positive_scores.unsqueeze(1)  # Shape: (batch_size, 1)
        
        # Concatenate scores and labels
        all_scores = torch.cat([positive_scores, negative_scores], dim=1)  # Shape: (batch_size, 1 + num_negatives)
        pos_labels = torch.ones_like(positive_scores)  # Shape: (batch_size, 1)
        neg_labels = torch.zeros_like(negative_scores)  # Shape: (batch_size, num_negatives)
        all_labels = torch.cat([pos_labels, neg_labels], dim=1)  # Shape: (batch_size, 1 + num_negatives)
        
        # Flatten for loss calculation
        loss = criterion(all_scores.view(-1), all_labels.view(-1))

        # Backpropagation
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)
    train_losses.append(train_loss)
    print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}")

    # Validation loop
    model.eval()
    val_loss = 0
    hitrate = 0
    
    with torch.no_grad():
        for batch in val_loader:
            user_ids = batch["user_id"].to(device)
            sequences = batch["sequence"].to(device)
            targets = batch["target"].to(device)
            negatives = batch["negatives"].to(device)  # Should be shape (batch_size, num_negatives)
        
            # Forward pass for positive items
            positive_scores = model(user_ids, sequences, item_features_tensor.to(device))  # Shape: (batch_size, num_items)
        
            # Reshape positive_scores for negative sampling
            positive_scores = torch.gather(positive_scores, 1, targets.unsqueeze(1))  # Shape: (batch_size,)
            
            # Negative sampling - same as positive scoring
            negative_scores = model(user_ids, sequences, item_features_tensor.to(device))  # Shape: (batch_size, num_items)
            negative_scores = torch.gather(negative_scores, 1, negatives)  # Shape: (batch_size, num_negatives)
            
            # Concatenate scores and labels
            all_scores = torch.cat([positive_scores, negative_scores], dim=1)  # Shape: (batch_size, 1 + num_negatives)
            
            pos_labels = torch.ones_like(positive_scores)  # Shape: (batch_size, 1)
            neg_labels = torch.zeros_like(negative_scores)  # Shape: (batch_size, num_negatives)
            all_labels = torch.cat([pos_labels, neg_labels], dim=1)  # Shape: (batch_size, 1 + num_negatives)
            
            # Flatten for BCEWithLogitsLoss
            loss = criterion(all_scores.view(-1), all_labels.view(-1))  # Flatten both tensors
        
            val_loss += loss.item()
        
            # Calculate HR@10 for the batch
            hitrate_batch = 0
            for i in range(targets.size(0)):
                if targets[i] in negatives[i]:
                    hitrate_batch += 1
            hitrate += hitrate_batch / targets.size(0)

    val_loss /= len(val_loader)
    val_losses.append(val_loss)
    
    hitrate10_scores.append(hitrate / len(val_loader))
    
    print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {val_loss:.4f}")

  "sequence": torch.tensor(sequence, dtype=torch.long),
  "target": torch.tensor(target, dtype=torch.long),


Epoch 1/30, Train Loss: 0.6834
Epoch 1/30, Validation Loss: 0.6433
Epoch 2/30, Train Loss: 0.5134
Epoch 2/30, Validation Loss: 0.2240
Epoch 3/30, Train Loss: 0.2203
Epoch 3/30, Validation Loss: 0.1331
Epoch 4/30, Train Loss: 0.1363
Epoch 4/30, Validation Loss: 0.1223
Epoch 5/30, Train Loss: 0.1122
Epoch 5/30, Validation Loss: 0.1220
Epoch 6/30, Train Loss: 0.1019
Epoch 6/30, Validation Loss: 0.1181
Epoch 7/30, Train Loss: 0.0957
Epoch 7/30, Validation Loss: 0.1204
Epoch 8/30, Train Loss: 0.0922
Epoch 8/30, Validation Loss: 0.1260
Epoch 9/30, Train Loss: 0.0889
Epoch 9/30, Validation Loss: 0.1277
Epoch 10/30, Train Loss: 0.0861
Epoch 10/30, Validation Loss: 0.1337
Epoch 11/30, Train Loss: 0.0842
Epoch 11/30, Validation Loss: 0.1309
Epoch 12/30, Train Loss: 0.0833
Epoch 12/30, Validation Loss: 0.1292
Epoch 13/30, Train Loss: 0.0819
Epoch 13/30, Validation Loss: 0.1384
Epoch 14/30, Train Loss: 0.0794
Epoch 14/30, Validation Loss: 0.1374
Epoch 15/30, Train Loss: 0.0787
Epoch 15/30, Validat

# Evaluation

In [7]:
user_item_history = train_data.groupby('customer_id')['product_id'].apply(list).to_dict()

def create_product_id_mapping(user_item_history):
    """
    Create a mapping from product_id (string) to an integer.
    """
    unique_product_ids = set([product_id for seq in user_item_history.values() for product_id in seq])
    product_id_mapping = {product_id: idx for idx, product_id in enumerate(unique_product_ids)}
    return product_id_mapping

def pad_sequences(user_item_history, num_items, product_id_mapping, device='cpu'):
    # Pad sequences to a minimum length and convert product_ids to integers
    padded_sequences = {}
    for user_id, seq in user_item_history.items():
        # Convert product IDs to integers using the mapping
        seq = [product_id_mapping.get(product_id, -1) for product_id in seq]  # -1 for unknown product_ids

        if len(seq) < num_items:  # Pad if sequence length is smaller
            pad_length = num_items - len(seq)
            seq = seq + [0] * pad_length  # Padding with 0
        padded_sequences[user_id] = torch.tensor(seq, dtype=torch.long).to(device)

    return padded_sequences

# Create product ID mapping
product_id_mapping = create_product_id_mapping(user_item_history)

# Apply padding to user-item history with the mapping
padded_user_item_history = pad_sequences(user_item_history, num_items, product_id_mapping, device='cpu')

def generate_top_10_recommendations(model, train_data, item_features, num_users, num_items, product_id_mapping, device='cpu'):
    """
    Generate top 10 recommendations for each user from the trained Caser model.

    Args:
        model: Trained Caser model.
        train_data: DataFrame containing historical transaction data.
        item_features: Tensor containing product features.
        num_users: Total number of users in the dataset.
        num_items: Total number of items in the dataset.
        product_id_mapping: Mapping from product_id (str) to integer index.
        device: Device to run the model on (e.g., 'cpu' or 'cuda').

    Returns:
        DataFrame with columns: 'customer_id', 'product_id', 'rank'.
    """
    model.eval()  # Set the model to evaluation mode

    # Create user-item history from transaction data
    user_item_history = train_data.groupby('customer_id')['product_id'].apply(list).to_dict()

    # Pad sequences for each user
    padded_user_item_history = pad_sequences(user_item_history, num_items, product_id_mapping, device=device)

    recommendations = []

    for user_id in range(num_users):
        # Get padded item sequence for the user
        item_seq = padded_user_item_history.get(user_id, torch.zeros(num_items, dtype=torch.long).to(device))

        # Prepare user IDs
        user_ids = torch.tensor([user_id] * num_items, dtype=torch.long).to(device)

        # Pass item features into the model
        features = item_features.unsqueeze(0).to(device)  # Add batch dimension
        scores = model(user_ids, item_seq.unsqueeze(0), features).detach().cpu().numpy().flatten()

        # Get top 10 recommendations
        top_10_item_indices = scores.argsort()[-10:][::-1]

        for rank, item_idx in enumerate(top_10_item_indices, start=1):
            recommendations.append({
                'customer_id': user_id,
                'product_id': item_idx,
                'rank': rank
            })

    return pd.DataFrame(recommendations)

product_features = (
    data_train.drop_duplicates(subset='product_id')  # Keep one row per product_id
    .set_index('product_id')  # Use product_id as the index
    .select_dtypes(include=['number'])  # Select only numeric columns for features
    .to_numpy()
)
item_features = torch.tensor(product_features, dtype=torch.float)

top_10_recommendations = generate_top_10_recommendations(model, train_data, item_features, num_users, num_items, product_id_mapping, device='cpu')

In [3]:
def hitrate_at_k(true_data: pd.DataFrame, predicted_data: pd.DataFrame, k: int = 10) -> float:
    """
    This function calculates the hitrate at k for the recommendations.
    It assesses how relevant our 10 product recommendations are.
    Args:
        true_data: True product purchase data
        predicted_data: Predicted product recommendations
        k: The number of top recommendations to consider
    Returns:
        The hitrate at k
    """
    # Ensure customer_id columns are strings before extracting numeric part (if needed)
    true_data['customer_id'] = true_data['customer_id'].astype(str)
    predicted_data['customer_id'] = predicted_data['customer_id'].astype(str)

    # Option 1: Extract numeric customer_id from strings if needed
    true_data['customer_id'] = true_data['customer_id'].str.extract('(\d+)', expand=False).astype(int)
    predicted_data['customer_id'] = predicted_data['customer_id'].str.extract('(\d+)', expand=False).astype(int)

    # Ensure product_id columns are strings before extracting numeric part
    true_data['product_id'] = true_data['product_id'].astype(str)
    predicted_data['product_id'] = predicted_data['product_id'].astype(str)

    # Extract numeric part from product_id and convert to integers
    true_data['product_id'] = true_data['product_id'].str.extract('(\d+)', expand=False).astype(int)
    predicted_data['product_id'] = predicted_data['product_id'].str.extract('(\d+)', expand=False).astype(int)

    # Merge the true and predicted data on customer_id and product_id
    data = pd.merge(left=true_data, right=predicted_data, how="left", on=["customer_id", "product_id"])

    # Filter recommendations where rank <= k
    df = data[data["rank"] <= k]

    # Calculate the number of successful recommendations per user
    non_null_counts = df.groupby('customer_id')['rank'].apply(lambda x: x.notna().sum()).reset_index(name='non_null_count')

    # Calculate the hitrate as the ratio of successful recommendations
    total_users = len(true_data['customer_id'].unique())
    hitrate = non_null_counts['non_null_count'].sum() / (total_users * k)

    return hitrate

# Calculate hitrate@10 with the updated function
hitrate_at_10 = hitrate_at_k(test_data, top_10_recommendations, k=10)
print(f"Hitrate@10 for the model is {hitrate_at_10:.2f}")

Hitrate@10 for the model is 10.02


In [None]:
user_item_history = train_data.groupby('customer_id')['product_id'].apply(list).to_dict()

def create_product_id_mapping(user_item_history):
    """
    Create a mapping from product_id (string) to an integer.
    """
    unique_product_ids = set([product_id for seq in user_item_history.values() for product_id in seq])
    product_id_mapping = {product_id: idx for idx, product_id in enumerate(unique_product_ids)}
    return product_id_mapping

def pad_sequences(user_item_history, num_items, product_id_mapping, device='cpu'):
    # Pad sequences to a minimum length and convert product_ids to integers
    padded_sequences = {}
    for user_id, seq in user_item_history.items():
        # Convert product IDs to integers using the mapping
        seq = [product_id_mapping.get(product_id, -1) for product_id in seq]  # -1 for unknown product_ids

        if len(seq) < num_items:  # Pad if sequence length is smaller
            pad_length = num_items - len(seq)
            seq = seq + [0] * pad_length  # Padding with 0
        padded_sequences[user_id] = torch.tensor(seq, dtype=torch.long).to(device)

    return padded_sequences

# Create product ID mapping
product_id_mapping = create_product_id_mapping(user_item_history)

# Apply padding to user-item history with the mapping
padded_user_item_history = pad_sequences(user_item_history, num_items, product_id_mapping, device='cpu')

def generate_top_10_recommendations(model, train_data, item_features, num_users, num_items, product_id_mapping, device='cpu'):
    """
    Generate top 10 recommendations for each user from the trained Caser model.

    Args:
        model: Trained Caser model.
        train_data: DataFrame containing historical transaction data.
        item_features: Tensor containing product features.
        num_users: Total number of users in the dataset.
        num_items: Total number of items in the dataset.
        product_id_mapping: Mapping from product_id (str) to integer index.
        device: Device to run the model on (e.g., 'cpu' or 'cuda').

    Returns:
        DataFrame with columns: 'customer_id', 'product_id', 'rank'.
    """
    model.eval()  # Set the model to evaluation mode

    # Create user-item history from transaction data
    user_item_history = train_data.groupby('customer_id')['product_id'].apply(list).to_dict()

    # Pad sequences for each user
    padded_user_item_history = pad_sequences(user_item_history, num_items, product_id_mapping, device=device)

    recommendations = []

    for user_id in range(num_users):
        # Get padded item sequence for the user
        item_seq = padded_user_item_history.get(user_id, torch.zeros(num_items, dtype=torch.long).to(device))

        # Prepare user IDs
        user_ids = torch.tensor([user_id] * num_items, dtype=torch.long).to(device)

        # Pass item features into the model
        features = item_features.unsqueeze(0).to(device)  # Add batch dimension
        scores = model(user_ids, item_seq.unsqueeze(0), features).detach().cpu().numpy().flatten()
        
        for item_idx, score in enumerate(scores):
            recommendations.append({
                'customer_id': user_id,
                'product_id': item_idx,
                'score': scores
            })

    return pd.DataFrame(recommendations)

product_features = (
    data_train.drop_duplicates(subset='product_id')  # Keep one row per product_id
    .set_index('product_id')  # Use product_id as the index
    .select_dtypes(include=['number'])  # Select only numeric columns for features
    .to_numpy()
)
item_features = torch.tensor(product_features, dtype=torch.float)

top_10_recommendations = generate_top_10_recommendations(model, train_data, item_features, num_users, num_items, product_id_mapping, device='cpu')