In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load data
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')
products = pd.read_csv('Products.csv')

customer_transactions = pd.merge(customers, transactions, on='CustomerID', how='left')

customer_features = customer_transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum', 
    'ProductID': 'nunique', 
    'TransactionDate': 'count'
}).reset_index()


scaler = StandardScaler()
customer_features_scaled = scaler.fit_transform(customer_features[['TotalValue', 'ProductID', 'TransactionDate']])

similarity_matrix = cosine_similarity(customer_features_scaled)

lookalike_map = {}
for i in range(20):
    similarity_scores = similarity_matrix[i]
    similar_customers = np.argsort(similarity_scores)[::-1][1:4]  # Top 3 excluding self
    lookalike_map[customers['CustomerID'][i]] = [
        (customers['CustomerID'][j], similarity_scores[j]) for j in similar_customers
    ]

lookalike_df = pd.DataFrame([(cust_id, sim_cust, score) 
                             for cust_id, similar_customers in lookalike_map.items() 
                             for sim_cust, score in similar_customers],
                            columns=['CustomerID', 'LookalikeID', 'SimilarityScore'])

lookalike_df.to_csv('Lookalike.csv', index=False)
