In [2]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load data
customers_file = "C:/Users/Jinkala Venkat/Downloads/Customers.csv"
products_file = "C:/Users/Jinkala Venkat/Downloads/Products.csv"
transactions_file = "C:/Users/Jinkala Venkat/Downloads/Transactions.csv"

customers = pd.read_csv(customers_file)
products = pd.read_csv(products_file)
transactions = pd.read_csv(transactions_file)

# Merge datasets
transactions_products = pd.merge(transactions, products, on="ProductID", how="left")
full_data = pd.merge(transactions_products, customers, on="CustomerID", how="left")

# Create customer profiles
customer_profiles = full_data.groupby("CustomerID").agg({
    "Region": "first",
    "SignupDate": "first",
    "Category": lambda x: x.value_counts().idxmax(),
    "Price_y": "mean",
    "TotalValue": "sum",
    "Quantity": "sum"
}).reset_index()

# Normalize numerical features
scaler = MinMaxScaler()
numerical_cols = ["Price_y", "TotalValue", "Quantity"]
customer_profiles[numerical_cols] = scaler.fit_transform(customer_profiles[numerical_cols])

# One-hot encode categorical features
features = ["Region", "Category", "Price_y", "TotalValue", "Quantity"]
encoded_profiles = pd.get_dummies(customer_profiles[features])

# Compute similarity matrix
similarity_matrix = cosine_similarity(encoded_profiles)

# Generate lookalike recommendations
lookalike_map = {}
customer_ids = customer_profiles["CustomerID"].tolist()

for idx, customer_id in enumerate(customer_ids[:20]):
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    top_similar = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:4]
    lookalike_map[customer_id] = [(customer_ids[i], round(score, 4)) for i, score in top_similar]

# Save results to CSV
lookalike_df = pd.DataFrame({
    "CustomerID": lookalike_map.keys(),
    "Lookalikes": [str(v) for v in lookalike_map.values()]
})
lookalike_df.to_csv("Lookalike.csv", index=False)
