In [4]:
import pandas as pd
from scipy.io import arff
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Load user activity data (interaction data)
data, meta = arff.loadarff('cleaned__user_activity1.arff')
user_activity_df = pd.DataFrame(data)

# Load orders, order items, and products data
orders_df = pd.read_csv('orders.csv')
order_items_df = pd.read_csv('order_items.csv')
products_df = pd.read_csv('products.csv')

# Rename columns to ensure consistency
products_df.rename(columns={'id': 'product_id'}, inplace=True)
orders_df.rename(columns={'id': 'order_id'}, inplace=True)

# Define interaction values
interaction_values = {
    b'browse': 2,
    b'view': 3,
    b'search': 3,
    b'add_to_cart': 4,
    b'remove_from_cart': 0,
    b'add_to_favorite': 5,
    b'remove_from_favorite': 0,
    b'add_to_preferred_categories': 3,
    b'completed_checkout': 6
}

# Map activity_type to interaction values
user_activity_df['interaction_value'] = user_activity_df['activity_type'].map(interaction_values)

# Merge orders and order items, add interaction values
orders_with_items = pd.merge(order_items_df, orders_df, on='order_id')
orders_with_items['interaction_value'] = 5  # Assign high value for purchases

# Concatenate interactions
all_interactions = pd.concat([
    user_activity_df[['user_id', 'product_id', 'interaction_value']].dropna(),
    orders_with_items[['user_id', 'product_id', 'interaction_value']]
])

# Define a Reader for the surprise library and load the dataset
reader = Reader(rating_scale=(1, 6))
data = Dataset.load_from_df(all_interactions[['user_id', 'product_id', 'interaction_value']], reader)

# Split the dataset into training and testing sets
trainset, testset = train_test_split(data, test_size=0.3, random_state=42)

# Initialize and train the SVD model
svd_model = SVD()
svd_model.fit(trainset)

# Evaluate the model using RMSE
predictions = svd_model.test(testset)
rmse = accuracy.rmse(predictions)
print(f"RMSE: {rmse}")

# Function to recommend top N products for a specific user
def recommend_products(user_id, model, product_df, interactions_df, n_recommendations=5):
    all_product_ids = product_df['product_id'].unique()
    user_interactions = interactions_df[interactions_df['user_id'] == user_id]['product_id'].unique()
    products_to_predict = [prod for prod in all_product_ids if prod not in user_interactions]
    
    predictions = [(prod, model.predict(user_id, prod).est) for prod in products_to_predict]
    top_recommendations = sorted(predictions, key=lambda x: x[1], reverse=True)[:n_recommendations]
    
    recommended_products = pd.DataFrame(top_recommendations, columns=['product_id', 'predicted_score'])
    recommended_products = pd.merge(recommended_products, product_df[['product_id', 'name']], on='product_id')
    
    return recommended_products

# Generate recommendations for a specific user
user_id = 9
recommended_products = recommend_products(user_id, svd_model, products_df, all_interactions)

# Display the recommended products
print(f"Top product recommendations for user {user_id}:")
print(recommended_products)


RMSE: 1.2184
RMSE: 1.2183529827083912
Top product recommendations for user 9:
   product_id  predicted_score                                         name
0          34         4.094561                                 Apple Tablet
1          26         4.093105                                  Soccer Ball
2          17         4.049772                                         Sofa
3          19         4.040244                                 Office Chair
4           5         4.036759  Nuakene Face & Neck Skin Care Beauty Meter,
