 # Data Preparation

In [1]:
# Importing a library 
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score

In [2]:
# Load the datasets
customers = pd.read_csv(r"C:\Users\utkar\Desktop\Zeotap\Customers (1).csv")
products = pd.read_csv(r"C:\Users\utkar\Desktop\Zeotap\Products.csv")
transactions = pd.read_csv(r"C:\Users\utkar\Desktop\Zeotap\Transactions.csv")

In [3]:
# Merge transactions with customers and products
transactions = transactions.merge(customers[['CustomerID', 'Region']], on='CustomerID')
transactions = transactions.merge(products[['ProductID', 'Category', 'Price']], on='ProductID')

# Feature Engineering

In [4]:
# Create a customer profile based on transaction history
customer_transactions = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'TransactionID': 'count',
    'Quantity': 'sum'
}).reset_index()

In [5]:
customer_transactions.columns = ['CustomerID', 'TotalSpend', 'TransactionCount', 'TotalQuantity']

In [6]:
# Merge customer data with aggregated transaction data
customer_data = pd.merge(customers, customer_transactions, on='CustomerID', how='left').fillna(0)

In [7]:
# Normalize numerical columns
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(customer_data[['TotalSpend', 'TransactionCount', 'TotalQuantity']])
customer_data[['ScaledSpend', 'ScaledCount', 'ScaledQuantity']] = scaled_features

In [8]:
# Calculate cosine similarity
similarity_matrix = cosine_similarity(scaled_features)

In [9]:
# Function to get top 3 similar customers
def get_top_3_similar(customers_df, similarity_matrix, customer_id):
    idx = customers_df[customers_df['CustomerID'] == customer_id].index[0]
    similar_indices = np.argsort(similarity_matrix[idx])[::-1][1:4]  # Top 3 excluding self
    similar_scores = similarity_matrix[idx][similar_indices]
    similar_customers = customers_df.iloc[similar_indices]['CustomerID'].values
    return list(zip(similar_customers, similar_scores))

In [10]:
# Generate lookalikes for first 20 customers
lookalike_results = {}
for cust_id in customer_data['CustomerID'][:20]:
    lookalike_results[cust_id] = get_top_3_similar(customer_data, similarity_matrix, cust_id)

In [12]:
# Save to Utkarsh_Shrivastawa_Lookalike.csv
import csv

with open('Utkarsh_Shrivastawa_Lookalike.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['CustomerID', 'SimilarCustomers'])
    for key, value in lookalike_results.items():
        writer.writerow([key, value])

print("Lookalike.csv generated successfully!")

Lookalike.csv generated successfully!


In [14]:
Utkarsh_Shrivastawa_Lookalike = pd.read_csv(r"C:\Users\utkar\Desktop\Zeotap\Utkarsh_Shrivastawa_Lookalike.csv")

In [15]:
Utkarsh_Shrivastawa_Lookalike.head()

Unnamed: 0,CustomerID,SimilarCustomers
0,C0001,"[('C0005', 0.9998846535342246), ('C0131', 0.99..."
1,C0002,"[('C0029', 0.9999112006371909), ('C0031', 0.99..."
2,C0003,"[('C0136', 0.9998174786510949), ('C0073', 0.99..."
3,C0004,"[('C0195', 0.9999669500657861), ('C0039', 0.99..."
4,C0005,"[('C0067', 0.9999973395785413), ('C0108', 0.99..."
