## Lookalike Model

In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

# Merge customer and transaction data
customer_transactions = pd.merge(transactions, customers, on='CustomerID')

# Aggregate transaction data by customer
customer_agg = customer_transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spending by the customer
    'Quantity': 'sum',    # Total quantity of products purchased
    'Price': 'mean'       # Average price of products purchased
}).reset_index()

# Merge with customer profile data
customer_profile = pd.merge(customers, customer_agg, on='CustomerID')

# Encode categorical variables (Region)
customer_profile['Region'] = customer_profile['Region'].astype('category').cat.codes

# Normalize numerical features
scaler = StandardScaler()
customer_profile[['TotalValue', 'Quantity', 'Price']] = scaler.fit_transform(customer_profile[['TotalValue', 'Quantity', 'Price']])

# Calculate similarity matrix
similarity_matrix = cosine_similarity(customer_profile[['Region', 'TotalValue', 'Quantity', 'Price']])


In [2]:

# Function to get top 3 lookalikes
def get_lookalikes(customer_id, similarity_matrix, top_n=3):
    """
    Returns a list of top N similar customers with their similarity scores.
    """
    customer_index = customer_profile[customer_profile['CustomerID'] == customer_id].index[0]
    similarity_scores = list(enumerate(similarity_matrix[customer_index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_lookalikes = similarity_scores[1:top_n+1]  # Exclude the customer itself
    return [(customer_profile.iloc[i[0]]['CustomerID'], i[1]) for i in top_lookalikes]

In [3]:
# Generate lookalikes for the first 20 customers (C0001 - C0020)
lookalike_results = {}
for customer_id in customer_profile['CustomerID'][:20]:
    lookalikes = get_lookalikes(customer_id, similarity_matrix)
    lookalike_results[customer_id] = lookalikes

# Save results to CSV in the required format
lookalike_df = pd.DataFrame([(k, v[0][0], v[0][1], v[1][0], v[1][1], v[2][0], v[2][1]) for k, v in lookalike_results.items()],
                            columns=["CustomerID", "Similar1", "Score1", "Similar2", "Score2", "Similar3", "Score3"])


In [4]:

# Save the CSV with the required naming convention
lookalike_df.to_csv("Vallem_Divya Sri_Lookalike.csv", index=False)

print("Lookalike model results saved to 'Vallem_Divya Sri_Lookalike.csv'")

Lookalike model results saved to 'Vallem_Divya Sri_Lookalike.csv'
