# Task 2: Lookalike Model

In [46]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [47]:
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [48]:
customer_transactions = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

In [49]:
customer_features = customer_transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'ProductID': lambda x: ', '.join(x.unique()),  
    'Region': 'first',  
}).reset_index()

In [50]:
customer_features = pd.get_dummies(customer_features, columns=['Region'])

In [51]:
similarity_matrix = cosine_similarity(customer_features.drop(columns=['CustomerID', 'ProductID']))

In [52]:
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features['CustomerID'], columns=customer_features['CustomerID'])

In [53]:
def get_top_lookalikes(customer_id, n=3):
    scores = similarity_df[customer_id].sort_values(ascending=False)
    top_lookalikes = scores[scores.index != customer_id].head(n)
    return top_lookalikes

In [54]:
lookalike_results = []
for cust_id in customer_features['CustomerID'].head(20):
    top_lookalikes = get_top_lookalikes(cust_id)
    for lookalike_id, score in top_lookalikes.items():
        lookalike_results.append((cust_id, lookalike_id, score))

In [55]:
lookalike_df = pd.DataFrame(lookalike_results, columns=['CustomerID', 'LookalikeID', 'SimilarityScore'])

In [56]:
lookalike_df.to_csv('Yashi_Gupta_Lookalike.csv', index=False)

In [57]:
print("Lookalike model created successfully. Results saved to Yashi_Gupta_Lookalike.csv.")

Lookalike model created successfully. Results saved to Lookalike.csv.
