In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [4]:
# Load datasets
customers = pd.read_csv("E:/Project of DS/Zeotap/Customers.csv")
products = pd.read_csv("E:/Project of DS/Zeotap/Products.csv")
transactions = pd.read_csv("E:/Project of DS/Zeotap/Transactions.csv")

In [6]:
# Merge transactions with customer data
transactions = transactions.merge(customers[['CustomerID', 'Region']], on='CustomerID', how='left')

# 1. Feature Engineering: Create user-profile vectors

In [8]:
# Customer spending behavior and product preferences
user_features = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spending
    'ProductID': 'nunique',  # Number of unique products purchased
    'Quantity': 'sum',  # Total quantity purchased
}).reset_index()

In [11]:
# Add customer region as categorical data
region_dummies = pd.get_dummies(customers['Region'])
region_dummies['CustomerID'] = customers['CustomerID']
user_features = user_features.merge(region_dummies, on='CustomerID', how='left')

In [13]:
# 2. Standardizing the features
scaler = StandardScaler()
user_features_scaled = scaler.fit_transform(user_features.drop('CustomerID', axis=1))

In [15]:
# 3. Similarity Calculation: Using Cosine Similarity
cosine_sim = cosine_similarity(user_features_scaled)

In [17]:
# 4. Recommendation Function: Find top 3 lookalikes for each customer
lookalike_recommendations = {}

In [19]:
for i, customer_id in enumerate(user_features['CustomerID']):
    # Get similarity scores for each customer
    similarity_scores = list(enumerate(cosine_sim[i]))
    # Sort the similarity scores in descending order (excluding self)
    sorted_similarities = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:4]
    # Store top 3 lookalikes with their similarity scores
    lookalike_recommendations[customer_id] = [(user_features['CustomerID'][j], score) for j, score in sorted_similarities]


In [21]:
# 5. Output the results into a CSV file
lookalike_df = []
for cust_id, recommendations in lookalike_recommendations.items():
    for rec in recommendations:
        lookalike_df.append([cust_id, rec[0], rec[1]])

lookalike_df = pd.DataFrame(lookalike_df, columns=["CustomerID", "LookalikeID", "SimilarityScore"])
lookalike_df.to_csv("FirstName_LastName_Lookalike.csv", index=False)

In [23]:
# Display the first few recommendations
print(lookalike_df.head())

  CustomerID LookalikeID  SimilarityScore
0      C0001       C0107         0.996725
1      C0001       C0137         0.996063
2      C0001       C0191         0.990618
3      C0002       C0142         0.994534
4      C0002       C0177         0.986784
