In [None]:

# Install necessary libraries
!pip install pandas matplotlib seaborn


In [None]:

# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:

# Upload customer_orders.csv
from google.colab import files
uploaded = files.upload()

import io
customer_orders = pd.read_csv(io.BytesIO(uploaded['customer_orders.csv']))

# Preview
customer_orders.head()


In [None]:

# Convert order_date to datetime
customer_orders['order_date'] = pd.to_datetime(customer_orders['order_date'])

# Create cohort_month (first purchase month per customer)
customer_orders['cohort_month'] = customer_orders.groupby('customer_id')['order_date'].transform('min').dt.to_period('M')

# Create order_month
customer_orders['order_month'] = customer_orders['order_date'].dt.to_period('M')

# Calculate cohort index
customer_orders['cohort_index'] = (customer_orders['order_month'].dt.year - customer_orders['cohort_month'].dt.year) * 12 + (customer_orders['order_month'].dt.month - customer_orders['cohort_month'].dt.month)

customer_orders[['customer_id', 'order_date', 'cohort_month', 'order_month', 'cohort_index']].head()


In [None]:

# Group by cohort_month and cohort_index
cohort_data = customer_orders.groupby(['cohort_month', 'cohort_index'])['customer_id'].nunique().reset_index()

# Pivot
cohort_pivot = cohort_data.pivot(index='cohort_month', columns='cohort_index', values='customer_id')

# Show pivot
cohort_pivot


In [None]:

# Plot heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(cohort_pivot, annot=True, fmt='g', cmap='YlGnBu')
plt.title('Customer Retention Table')
plt.xlabel('Months Since First Purchase')
plt.ylabel('Cohort (First Purchase Month)')
plt.show()
