In [3]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity


In [5]:
# Load datasets
customers = pd.read_csv(r'C:\Users\ADMIN\Downloads\Customers.csv')
products = pd.read_csv(r'C:\Users\ADMIN\Downloads\Products.csv')
transactions = pd.read_csv(r'C:\Users\ADMIN\Downloads\Transactions.csv')

In [7]:
# Merge transactions with customers and products
merged_df = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

In [9]:
# Create a pivot table: Customers vs Products (fill missing values with 0)
customer_product_matrix = merged_df.pivot_table(index='CustomerID', columns='ProductID', values='Quantity', aggfunc='sum').fillna(0)

In [11]:
# Compute similarity using cosine similarity
similarity_matrix = pd.DataFrame(cosine_similarity(customer_product_matrix), 
                                 index=customer_product_matrix.index, 
                                 columns=customer_product_matrix.index)

In [13]:
# Function to find top N similar customers
def get_top_n_similar(customers_list, similarity_df, n=3):
    lookalikes = {}
    for cust_id in customers_list:
        if cust_id in similarity_df.index:
            similar_customers = similarity_df[cust_id].drop(cust_id).nlargest(n)
            lookalikes[cust_id] = list(zip(similar_customers.index, similar_customers.values))
    return lookalikes


In [15]:
# Get lookalikes for first 20 customers (C0001 - C0020)
top_20_customers = customers['CustomerID'].iloc[:20]
lookalike_results = get_top_n_similar(top_20_customers, similarity_matrix)

In [17]:
# Convert to DataFrame for CSV output
lookalike_df = pd.DataFrame([
    {'CustomerID': k, 'Lookalike1': v[0][0], 'Score1': v[0][1], 
     'Lookalike2': v[1][0], 'Score2': v[1][1], 
     'Lookalike3': v[2][0], 'Score3': v[2][1]} for k, v in lookalike_results.items()
])

In [19]:
# Save to CSV
lookalike_df.to_csv('Lookalike.csv', index=False)
print("Lookalike.csv has been saved successfully!")

Lookalike.csv has been saved successfully!


In [21]:
lookalike_df

Unnamed: 0,CustomerID,Lookalike1,Score1,Lookalike2,Score2,Lookalike3,Score3
0,C0001,C0097,0.547723,C0194,0.469668,C0199,0.438178
1,C0002,C0030,0.372822,C0091,0.333914,C0071,0.329914
2,C0003,C0134,0.519947,C0181,0.517597,C0144,0.4
3,C0004,C0070,0.498888,C0132,0.438178,C0063,0.336067
4,C0005,C0096,0.648204,C0055,0.514496,C0064,0.332877
5,C0006,C0058,0.648886,C0040,0.629512,C0178,0.374634
6,C0007,C0020,0.588348,C0079,0.496139,C0026,0.363137
7,C0008,C0144,0.392232,C0088,0.339683,C0003,0.313786
8,C0009,C0140,0.560112,C0162,0.513265,C0062,0.495074
9,C0010,C0033,0.486664,C0077,0.424795,C0094,0.405906


In [23]:
from IPython.display import FileLink
FileLink(r'Lookalike.csv')