# Import necessary libraries

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets

In [None]:
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')
products = pd.read_csv('Products.csv')


# Merge datasets for analysis

In [None]:
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

# Feature Engineering: Aggregate data by CustomerID
customer_features = merged_data.groupby('CustomerID').agg(
    TotalSpending=('TotalValue', 'sum'),
    AvgTransactionValue=('TotalValue', 'mean'),
    NumTransactions=('TransactionID', 'count'),
    Region=('Region', 'first')  # Take the first region for each customer
).reset_index()

# Normalize numerical features

In [None]:
scaler = MinMaxScaler()
numerical_features = ['TotalSpending', 'AvgTransactionValue', 'NumTransactions']
customer_features[numerical_features] = scaler.fit_transform(customer_features[numerical_features])

# Encode categorical features (Region)

In [None]:
customer_features = pd.get_dummies(customer_features, columns=['Region'], drop_first=True)


# Compute Cosine Similarity

In [None]:
similarity_matrix = cosine_similarity(customer_features.drop(['CustomerID'], axis=1))

# Find top 3 similar customers for each of the first 20 customers (C0001–C0020)

In [None]:
lookalike_results = {}
customer_ids = customer_features['CustomerID'].tolist()

for idx, customer_id in enumerate(customer_ids[:20]):

    similarity_scores = list(enumerate(similarity_matrix[idx]))
  
    sorted_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:4]

    lookalike_results[customer_id] = [(customer_ids[i], round(score, 2)) for i, score in sorted_scores]

# Save results to Lookalike.csv

In [None]:
lookalike_df = pd.DataFrame([
    {'CustomerID': cust_id, 
     'Lookalike1': lookalikes[0][0], 'Score1': lookalikes[0][1],
     'Lookalike2': lookalikes[1][0], 'Score2': lookalikes[1][1],
     'Lookalike3': lookalikes[2][0], 'Score3': lookalikes[2][1]}
    for cust_id, lookalikes in lookalike_results.items()
])

lookalike_df.to_csv('FirstName_LastName_Lookalike.csv', index=False)