In [13]:
# Importing necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load the datasets
customers = pd.read_csv("D:/Practicals/Customers.csv")
products = pd.read_csv("D:/Practicals/Products.csv")
transactions = pd.read_csv("D:/Practicals/Transactions.csv")

# Merge datasets for analysis
merged_data = transactions.merge(customers, on="CustomerID", how="left").merge(products, on="ProductID", how="left")

# Aggregate transaction data to create customer profiles
customer_profiles = merged_data.groupby("CustomerID").agg({
    "Quantity": "sum",           # Total quantity purchased
    "TotalValue": "sum",         # Total value of transactions
    "Price_x": "mean",           # Average transaction price
    "Region": "first"            # Customer's region
}).reset_index()

# Encode categorical data (e.g., Region) using one-hot encoding
customer_profiles = pd.get_dummies(customer_profiles, columns=["Region"], drop_first=True)

# Standardize the numerical data
scaler = StandardScaler()
numerical_features = ["Quantity", "TotalValue", "Price_x"]  # Numerical columns to scale
customer_profiles[numerical_features] = scaler.fit_transform(customer_profiles[numerical_features])

# Compute cosine similarity between all customers
customer_similarity = cosine_similarity(customer_profiles.drop("CustomerID", axis=1))

# Create a lookalike recommendation for the first 20 customers
lookalike_results = {}
customer_ids = customer_profiles["CustomerID"]
for i, customer_id in enumerate(customer_ids[:20]):
    similarity_scores = list(enumerate(customer_similarity[i]))
    # Skip self-match and get the top 3 most similar customers
    similar_customers = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:4]
    lookalike_results[customer_id] = [
        {"CustomerID": customer_ids.iloc[sim[0]], "Score": round(sim[1], 2)} for sim in similar_customers
    ]

# Save the lookalike recommendations to a CSV file
lookalike_df = pd.DataFrame([
    {"CustomerID": cust_id, "Lookalikes": lookalikes} 
    for cust_id, lookalikes in lookalike_results.items()
])
lookalike_df.to_csv("D:/Practicals/Lookalike.csv", index=False)

# Print the results
print(lookalike_df.head())



  CustomerID                                         Lookalikes
0      C0001  [{'CustomerID': 'C0137', 'Score': 0.98}, {'Cus...
1      C0002  [{'CustomerID': 'C0088', 'Score': 0.99}, {'Cus...
2      C0003  [{'CustomerID': 'C0190', 'Score': 0.98}, {'Cus...
3      C0004  [{'CustomerID': 'C0165', 'Score': 0.98}, {'Cus...
4      C0005  [{'CustomerID': 'C0128', 'Score': 1.0}, {'Cust...
