<a href="https://colab.research.google.com/github/vidulaarumugam/Vidula_zeotap/blob/master/Vidula_N_A_Lookalike.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler


customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

# Convert 'SignupDate' and 'TransactionDate' to datetime
customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])
transactions_df['TransactionDate'] = pd.to_datetime(transactions_df['TransactionDate'])


customers_features = customers_df[['CustomerID', 'Region', 'SignupDate']]


transaction_data = transactions_df.groupby('CustomerID').agg(
    total_spend=('TotalValue', 'sum'),
    transaction_count=('TransactionID', 'count'),
    average_spend=('TotalValue', 'mean')
).reset_index()

# Merge customer profile and transaction data
customer_data = pd.merge(customers_features, transaction_data, on='CustomerID')


customer_data['days_since_signup'] = (pd.to_datetime('today') - customer_data['SignupDate']).dt.days

# One-hot encode the 'Region'
customer_data = pd.get_dummies(customer_data, columns=['Region'], drop_first=True)

# Now select only the relevant features
features_to_scale = ['days_since_signup', 'total_spend', 'transaction_count', 'average_spend']

# Scaling the selected features for similarity comparison
scaler = StandardScaler()
scaled_data = scaler.fit_transform(customer_data[features_to_scale])

# Create a DataFrame with the scaled data
scaled_customer_data = pd.DataFrame(scaled_data, columns=features_to_scale)
scaled_customer_data['CustomerID'] = customer_data['CustomerID']

# Merge the scaled data with the original one-hot encoded customer data
scaled_customer_data = pd.merge(scaled_customer_data, customer_data.drop(columns=['SignupDate', 'total_spend', 'transaction_count', 'average_spend']), on='CustomerID')


In [9]:

similarity_matrix = cosine_similarity(scaled_customer_data.drop(columns=['CustomerID']))


similarity_df = pd.DataFrame(similarity_matrix, index=scaled_customer_data['CustomerID'], columns=scaled_customer_data['CustomerID'])


In [8]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler




lookalike_results = []

# Loop through the first 20 customers
for customer_id in scaled_customer_data['CustomerID'].iloc[:20]:
    # Calculate cosine similarity between the target customer and all other customers
    target_customer = scaled_customer_data[scaled_customer_data['CustomerID'] == customer_id].drop(columns=['CustomerID'])
    other_customers = scaled_customer_data[scaled_customer_data['CustomerID'] != customer_id].drop(columns=['CustomerID'])

    similarity_scores = cosine_similarity(target_customer, other_customers)[0]


    top_indices = np.argsort(similarity_scores)[-3:][::-1]
    top_customers = scaled_customer_data['CustomerID'].iloc[top_indices]

    # Store the customer ID and similarity scores
    for i, top_customer in enumerate(top_customers):
        lookalike_results.append([customer_id, top_customer, similarity_scores[top_indices[i]]])

lookalike_df = pd.DataFrame(lookalike_results, columns=['CustomerID', 'Lookalike_CustomerID', 'Similarity_Score'])

print("Lookalike Recommendations:")
print(lookalike_df.head(20))

# Save the results
lookalike_df.to_csv('Lookalike.csv', index=False)




print("\nLookalike recommendations have been saved to 'Lookalike.csv'.")


Lookalike Recommendations:
   CustomerID Lookalike_CustomerID  Similarity_Score
0       C0001                C0151          1.000000
1       C0001                C0010          1.000000
2       C0001                C0117          1.000000
3       C0002                C0133          1.000000
4       C0002                C0105          1.000000
5       C0002                C0026          1.000000
6       C0003                C0051          1.000000
7       C0003                C0136          0.999998
8       C0003                C0190          0.999998
9       C0004                C0112          1.000000
10      C0004                C0107          1.000000
11      C0004                C0101          1.000000
12      C0005                C0158          1.000000
13      C0005                C0026          1.000000
14      C0005                C0006          1.000000
15      C0006                C0167          0.999996
16      C0006                C0147          0.999996
17      C0006      