# Step 1: Load the datasets

In [30]:
import pandas as pd
transaction_data = pd.read_csv('transaction_data.csv')
purchase_behaviour = pd.read_csv('purchase_behaviour.csv')

# Step 2: Merge datasets

In [31]:
merged_data = pd.merge(transaction_data, purchase_behaviour, on='LYLTY_CARD_NBR', how='left')

# Step 3: Calculate total sales

In [32]:
product_sales = merged_data.groupby('PROD_NAME')['TOT_SALES'].sum().reset_index()

# Sort by total sales to find top-selling products
top_3_products = product_sales.nlargest(3, 'TOT_SALES')

# Identify the best-selling product
product_quantity = transaction_data.groupby('PROD_NAME')['PROD_QTY'].sum().reset_index()
best_selling_product = product_quantity.nlargest(1, 'PROD_QTY')

# Display the product name and total quantity sold
product_name = best_selling_product.iloc[0]['PROD_NAME']
total_sales = best_selling_product.iloc[0]['PROD_QTY']


Calculate the total sales and transaction counts for each customer segment

In [34]:
customer_segment_analysis = merged_data.groupby(['LIFESTAGE', 'PREMIUM_CUSTOMER']).agg({
    'TOT_SALES': 'sum',
    'TXN_ID': 'nunique'}).reset_index()

# Step 4: Identify loyal customers and analyze characteristics

In [35]:
loyal_customers = customer_segment_analysis.sort_values(by=['TOT_SALES', 'TXN_ID'], ascending=False)

In [37]:
print("Top 3 Most Profitable Products:")
top_3_products

Top 3 Most Profitable Products:


Unnamed: 0,PROD_NAME,TOT_SALES
11,Dorito Corn Chp Supreme 380g,40352.0
86,Smiths Crnkle Chip Orgnl Big Bag 380g,36367.6
77,Smiths Crinkle Chips Salt & Vinegar 330g,34804.2


In [38]:
print("\nCharacteristics of Most Loyal Customers:")
loyal_customers


Characteristics of Most Loyal Customers:


Unnamed: 0,LIFESTAGE,PREMIUM_CUSTOMER,TOT_SALES,TXN_ID
6,OLDER FAMILIES,Budget,168363.25,22935
19,YOUNG SINGLES/COUPLES,Mainstream,157621.6,20783
13,RETIREES,Mainstream,155677.05,21363
15,YOUNG FAMILIES,Budget,139345.85,18970
9,OLDER SINGLES/COUPLES,Budget,136769.8,18301
10,OLDER SINGLES/COUPLES,Mainstream,133393.8,18194
11,OLDER SINGLES/COUPLES,Premium,132263.15,17654
12,RETIREES,Budget,113147.8,15113
7,OLDER FAMILIES,Mainstream,103445.55,14113
14,RETIREES,Premium,97646.05,13036


In [39]:
print(f"Best-Selling Product Name: {product_name}, Total Sales: {total_sales}")

Best-Selling Product Name: Dorito Corn Chp     Supreme 380g, Total Sales: 6509


# Step 5: Hypothesis


 - Premium customers tend to spend more per transaction.
 - Customers in certain life stages (like "Married") might show higher transaction amounts due to family needs.
 - Loyal customers could also prefer specific high-quality or high-value products.
 - The best-selling product is likely popular due to several factors such as its price, quality, and demand among different customer segments.