In [1]:
#  Import Libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler




In [2]:
# Load Data
customers = pd.read_csv('/content/Customers.csv')
products = pd.read_csv('/content/Products.csv')
transactions = pd.read_csv('/content/Transactions.csv')

#  Merge Data
data = pd.merge(transactions, customers, on='CustomerID', how='left')
data = pd.merge(data, products, on='ProductID', how='left')



In [8]:
#  Feature Engineering - Customer-Level Features
customer_features = data.groupby('CustomerID').agg({
    'Price_x': 'sum',  # Changed 'UnitPrice' to 'price' - Assuming the actual column name is 'price'
    'Quantity': 'sum',
    'TransactionID': 'count',
    'Category': lambda x: ' '.join(x.unique())  # Concatenate categories purchased
}).rename(columns={
    'Price_x': 'TotalRevenue', # Rename 'price' to 'TotalRevenue' - Changed from 'UnitPrice'
    'Quantity': 'TotalQuantity',
    'TransactionID': 'TransactionCount'
}).reset_index()

In [9]:
# Encode Categorical Features
customer_features = pd.get_dummies(customer_features, columns=['Category'], prefix='Category')

# Standardize Features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.iloc[:, 1:])
scaled_customer_features = pd.DataFrame(scaled_features, columns=customer_features.columns[1:], index=customer_features['CustomerID'])



In [10]:
#  Calculate Cosine Similarity
similarity_matrix = cosine_similarity(scaled_customer_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features['CustomerID'], columns=customer_features['CustomerID'])



In [11]:
# Recommend Top 3 Lookalikes
def get_top_lookalikes(customer_id, similarity_matrix, top_n=3):
    scores = similarity_matrix.loc[customer_id].sort_values(ascending=False)[1:top_n+1]  # Exclude self
    return list(zip(scores.index, scores.values))



In [12]:
#  Generate Lookalike Map for C0001-C0020
lookalike_map = {}
for customer_id in customer_features['CustomerID'][:20]:  # First 20 customers
    lookalike_map[customer_id] = get_top_lookalikes(customer_id, similarity_df)



In [13]:
#  Save Lookalike Map to CSV
lookalike_df = pd.DataFrame({
    'CustomerID': list(lookalike_map.keys()),
    'Lookalikes': [str(v) for v in lookalike_map.values()]
})
lookalike_df.to_csv('Lookalike.csv', index=False)
print('Lookalike Map saved to Lookalike.csv')



Lookalike Map saved to Lookalike.csv


In [14]:
#  Display Sample Lookalike Map
lookalike_df.head()

Unnamed: 0,CustomerID,Lookalikes
0,C0001,"[('C0064', 0.9992693917143656), ('C0018', 0.98..."
1,C0002,"[('C0159', 0.9960634230039127), ('C0133', 0.99..."
2,C0003,"[('C0031', 0.994108427749617), ('C0026', 0.992..."
3,C0004,"[('C0065', 0.9707173157606297), ('C0018', 0.94..."
4,C0005,"[('C0197', 0.9986021917927446), ('C0163', 0.94..."
