In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD

In [2]:
engagements_df = pd.read_csv("Engagements.csv") 

In [3]:
engagements_df.head()

Unnamed: 0,user_id,post_id,engagement
0,U1,P52,1
1,U1,P44,0
2,U1,P1,1
3,U1,P4,1
4,U1,P65,0


In [4]:
engagements_df.isnull().sum()

user_id       0
post_id       0
engagement    0
dtype: int64

In [None]:
# Pivot to get a matrix: rows = users, columns = posts
user_post_matrix = engagements_df.pivot(index='user_id', columns='post_id', values='engagement').fillna(0)

In [None]:
# Keep lists of user_ids and post_ids for mapping later
user_ids_cf = user_post_matrix.index.tolist()
post_ids_cf = user_post_matrix.columns.tolist()

In [7]:
# 3. Apply SVD for Collaborative Filtering
# ------------------------------
n_factors = 10  # number of latent factors
svd = TruncatedSVD(n_components=n_factors, random_state=42)

In [8]:
# Fit SVD on the user-post matrix
user_latent = svd.fit_transform(user_post_matrix)  # user latent features
post_latent = svd.components_.T                    # post latent features


In [9]:
# Reconstruct predicted engagement scores
predicted_scores = np.dot(user_latent, post_latent.T)

In [10]:
# 4. Get Top-3 Recommendations per User
# ------------------------------
top_k = 3
cf_recommendations = {}

for i, user in enumerate(user_ids_cf):
    # Get indices of top-k posts for this user
    top_indices = predicted_scores[i].argsort()[::-1][:top_k]
    # Map indices back to post_id
    top_posts = [post_ids_cf[j] for j in top_indices]
    cf_recommendations[user] = top_posts

# ------------------------------
# 5. Output Collaborative Filtering Recommendations
# ------------------------------
cf_recommendations_df = pd.DataFrame([
    {"user_id": user, "top_posts": top_posts} for user, top_posts in cf_recommendations.items()
])

In [12]:
# Example: test for user_id "U17"
test_user = "U18"

if test_user in cf_recommendations:
    print(f"Top {top_k} CF recommended posts for {test_user}: {cf_recommendations[test_user]}")
else:
    print(f"User {test_user} not found in the dataset.")


Top 3 CF recommended posts for U18: ['P96', 'P50', 'P91']


In [13]:
cf_recommendations_df.to_csv("collaborative_filtering_recommendations.csv", index=False)

In [14]:
import numpy as np

# Save CF predicted scores
np.save("cf_score_matrix.npy", predicted_scores)
np.save("user_ids_cf.npy", np.array(user_ids_cf))
np.save("post_ids_cf.npy", np.array(post_ids_cf))
