# Task 2: Lookalike Model

In [22]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from datetime import datetime
import pandas as pd

In [23]:
customers = pd.read_csv('/content/drive/MyDrive/Zeotap/Customers.csv')
products = pd.read_csv('/content/drive/MyDrive/Zeotap/Products.csv')
transactions = pd.read_csv('/content/drive/MyDrive/Zeotap/Transactions.csv')

Convert date columns to datetime and Merge datasets

In [24]:
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])
data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

Feature engineering

In [25]:
customer_features = data.groupby('CustomerID').agg({
    'TotalValue': 'sum',                           # Total spending by customer
    'Quantity': 'sum',                             # Total quantity purchased by customer
    'ProductID': lambda x: x.nunique(),            # Number of unique products purchased
    'TransactionDate': lambda x: (datetime.now() - x.max()).days  # Recency of last transaction
}).reset_index()

In [26]:
customer_features.columns = ['CustomerID', 'TotalSpending', 'TotalQuantity', 'UniqueProducts', 'Recency']

Standardize features for similarity calculation

In [27]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.iloc[:, 1:])

Compute cosine similarity between customers

In [28]:
similarity_matrix = cosine_similarity(scaled_features)

Recommend top 3 similar customers for the first 20 customers

In [31]:
lookalike_results = {}
for i, customer_id in enumerate(customer_features['CustomerID'][:20]):
    sim_scores = list(enumerate(similarity_matrix[i]))  # Get similarity scores for current customer
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:4]  # Top 3 excluding self
    lookalike_results[customer_id] = [(customer_features['CustomerID'][j], round(score, 4)) for j, score in sim_scores]

lookalike_df = pd.DataFrame({
    'CustomerID': list(lookalike_results.keys()),
    'Lookalikes': [str(v) for v in lookalike_results.values()]
})
lookalike_df.to_csv("/content/drive/MyDrive/Zeotap challenge/Yashas_P_Lookalike.csv", index=False)
print("Saved")


Saved
