# User Segmentation

Segment users into Heavy, Medium, or Light based on frequency.

In [None]:
# 📦 Import required libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
%matplotlib inline


## User Classification and Mapping

In [None]:
user_counts = df['Full Name'].value_counts()

def classify_user(count):
    if count >= 100:
        return 'Heavy'
    elif count >= 25:
        return 'Medium'
    else:
        return 'Light'

user_groups = user_counts.apply(classify_user)
df['User Group'] = df['Full Name'].map(user_counts).apply(classify_user)

# Plot session distribution by user group
plt.figure(figsize=(8, 5))
sns.countplot(data=df, x='User Group', order=['Heavy', 'Medium', 'Light'])
plt.title('Distribution of Sessions by User Group')
plt.xlabel('User Group')
plt.ylabel('Number of Sessions')
plt.tight_layout()
plt.show()

# Aggregate revenue and transaction info per user
transaction_counts = df['Full Name'].value_counts()
transaction_df = pd.DataFrame({'Full Name': transaction_counts.index, 'Num_Transactions': transaction_counts.values})
user_revenue_df = df.groupby('Full Name')['Price Paid'].sum().reset_index()
user_revenue_df.columns = ['Full Name', 'Revenue']

# Merge and classify into groups
user_data = user_revenue_df.merge(transaction_df, on='Full Name')
user_data['User Group'] = user_data['Num_Transactions'].apply(classify_user)

# Group-level descriptive statistics
group_stats = user_data.groupby('User Group').agg(
    Num_Users=('Full Name', 'count'),
    Total_Revenue=('Revenue', 'sum'),
    Avg_Revenue=('Revenue', 'mean'),
    Median_Revenue=('Revenue', 'median'),
    Min_Revenue=('Revenue', 'min'),
    Max_Revenue=('Revenue', 'max'),
    Std_Revenue=('Revenue', 'std'),
    Avg_Transactions=('Num_Transactions', 'mean'),
    Median_Transactions=('Num_Transactions', 'median'),
    Min_Transactions=('Num_Transactions', 'min'),
    Max_Transactions=('Num_Transactions', 'max'),
    Std_Transactions=('Num_Transactions', 'std')
).reset_index()

group_stats['Revenue_Share'] = 100 * group_stats['Total_Revenue'] / group_stats['Total_Revenue'].sum()
group_stats


In [None]:
# Step 1: Count sessions per user and classify them
user_counts = df['Full Name'].value_counts()
user_groups = user_counts.apply(classify_user)

# Step 2: Assign user group back to original df for plotting session distribution
df['User Group'] = df['Full Name'].map(user_counts).apply(classify_user)

# Plot session distribution by user group
plt.figure(figsize=(8, 5))
sns.countplot(data=df, x='User Group', order=['Heavy', 'Medium', 'Light'])
plt.title('Distribution of Sessions by User Group')
plt.xlabel('User Group')
plt.ylabel('Number of Sessions')
plt.tight_layout()
plt.show()

# Step 3: Aggregate revenue per user
user_revenue = df.groupby('Full Name')['Price Paid'].sum()

# Step 4: Combine revenue and user group into one DataFrame
user_group_df = pd.DataFrame({
    'Revenue': user_revenue,
    'User Group': user_groups
})

# Step 5: Total revenue by group
group_revenue = user_group_df.groupby('User Group')['Revenue'].sum().reindex(['Heavy', 'Medium', 'Light'])

# Step 6: Number of users per group
group_user_counts = user_group_df['User Group'].value_counts().reindex(['Heavy', 'Medium', 'Light'])

# Step 7: Average revenue per user in each group
avg_revenue_per_user = group_revenue / group_user_counts

# Step 8: Plot total and average revenue per user group
plt.figure(figsize=(10, 6))
sns.barplot(x=group_revenue.index, y=group_revenue.values)

# Annotate each bar
for i, (total, avg, count) in enumerate(zip(group_revenue.values, avg_revenue_per_user.values, group_user_counts.values)):
    plt.text(i, total + 0.01 * total, f"${total:,.0f}\n({count} users)\nAvg: ${avg:,.0f}",
             ha='center', va='bottom', fontsize=10)

plt.title('Total and Average Revenue by User Group')
plt.xlabel('User Group')
plt.ylabel('Total Revenue ($)')
plt.tight_layout()
plt.show()

# Merge and classify into groups
user_data = user_revenue_df.merge(transaction_df, on='Full Name')
user_data['User Group'] = user_data['Num_Transactions'].apply(classify_user)

# Group-level descriptive statistics
group_stats = user_data.groupby('User Group').agg(
    Num_Users=('Full Name', 'count'),
    Total_Revenue=('Revenue', 'sum'),
    Avg_Revenue=('Revenue', 'mean'),
    Median_Revenue=('Revenue', 'median'),
    Min_Revenue=('Revenue', 'min'),
    Max_Revenue=('Revenue', 'max'),
    Std_Revenue=('Revenue', 'std'),
    Avg_Transactions=('Num_Transactions', 'mean'),
    Median_Transactions=('Num_Transactions', 'median'),
    Min_Transactions=('Num_Transactions', 'min'),
    Max_Transactions=('Num_Transactions', 'max'),
    Std_Transactions=('Num_Transactions', 'std')
).reset_index()
group_stats = group_stats.sort_values(by='Num_Users', ascending=True)
group_stats['Revenue_Share'] = 100 * group_stats['Total_Revenue'] / group_stats['Total_Revenue'].sum()
#print(group_stats)

# Correcting the summary aggregation without .to_frame()
overall_summary = pd.DataFrame({
    'Num_Users': [user_data['Full Name'].nunique()],
    'Total_Revenue': [user_data['Revenue'].sum()],
    'Avg_Revenue_per_User': [user_data['Revenue'].mean()],
    'Median_Revenue_per_User': [user_data['Revenue'].median()],
    'Avg_Transactions_per_User': [user_data['Num_Transactions'].mean()],
    'Median_Transactions_per_User': [user_data['Num_Transactions'].median()],
    'Total_Transactions': [user_data['Num_Transactions'].sum()]
})

overall_summary['Revenue_Share'] = 100 * overall_summary['Total_Revenue'] / overall_summary['Total_Revenue'].sum()
#print(overall_summary)
# Format group-level summary table
formatted_group_stats = group_stats.copy()
formatted_group_stats['Total_Revenue'] = formatted_group_stats['Total_Revenue'].map('${:,.0f}'.format)
formatted_group_stats['Avg_Revenue'] = formatted_group_stats['Avg_Revenue'].map('${:,.0f}'.format)
formatted_group_stats['Median_Revenue'] = formatted_group_stats['Median_Revenue'].map('${:,.0f}'.format)
formatted_group_stats['Min_Revenue'] = formatted_group_stats['Min_Revenue'].map('${:,.0f}'.format)
formatted_group_stats['Max_Revenue'] = formatted_group_stats['Max_Revenue'].map('${:,.0f}'.format)
formatted_group_stats['Std_Revenue'] = formatted_group_stats['Std_Revenue'].map('${:,.0f}'.format)

transaction_cols = [
    'Avg_Transactions', 'Median_Transactions',
    'Min_Transactions', 'Max_Transactions', 'Std_Transactions'
]
for col in transaction_cols:
    formatted_group_stats[col] = formatted_group_stats[col].map('{:.1f}'.format)

formatted_group_stats['Revenue_Share'] = formatted_group_stats['Revenue_Share'].map('{:.1f}%'.format)

# Format overall summary table
formatted_overall_summary = overall_summary.copy()
formatted_overall_summary['Total_Revenue'] = formatted_overall_summary['Total_Revenue'].map('${:,.0f}'.format)
formatted_overall_summary['Avg_Revenue_per_User'] = formatted_overall_summary['Avg_Revenue_per_User'].map('${:,.0f}'.format)
formatted_overall_summary['Median_Revenue_per_User'] = formatted_overall_summary['Median_Revenue_per_User'].map('${:,.0f}'.format)
formatted_overall_summary['Avg_Transactions_per_User'] = formatted_overall_summary['Avg_Transactions_per_User'].map('{:.1f}'.format)
formatted_overall_summary['Median_Transactions_per_User'] = formatted_overall_summary['Median_Transactions_per_User'].map('{:.1f}'.format)
formatted_overall_summary['Revenue_Share'] = formatted_overall_summary['Revenue_Share'].map('{:.1f}%'.format)

# Display tables as HTML in a notebook or browser
from IPython.display import display, HTML

group_html = formatted_group_stats.to_html(index=False, escape=False)
overall_html = formatted_overall_summary.to_html(index=False, escape=False)

# Combine both tables
full_html = f"<h2>Overall Summary</h2>{overall_html}<br><h2>Group-Level Summary</h2>{group_html}"

# Display in notebook
display(HTML(full_html))

In [None]:
# Prepare plots for key metrics with average, median, and std deviation

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('User Group Summary Statistics (Avg, Median, Std)', fontsize=16)

# Plot 1: Number of Users
sns.barplot(x='User Group', y='Num_Users', data=group_stats, ax=axes[0, 0])
axes[0, 0].set_title('Number of Users')
axes[0, 0].set_ylabel('Users')

# Plot 2: Revenue Share
sns.barplot(x='User Group', y='Revenue_Share', data=group_stats, ax=axes[0, 1])
axes[0, 1].set_title('Revenue Share (%)')
axes[0, 1].set_ylabel('Percentage')

# Plot 3: Revenue per User (Avg, Median, Std)
axes[1, 0].bar(group_stats['User Group'], group_stats['Avg_Revenue'], label='Average')
axes[1, 0].bar(group_stats['User Group'], group_stats['Median_Revenue'], alpha=0.7, label='Median')
axes[1, 0].errorbar(group_stats['User Group'], group_stats['Avg_Revenue'],
                   yerr=group_stats['Std_Revenue'], fmt='o', color='black', capsize=5, label='Std Dev')
axes[1, 0].set_title('Revenue per User')
axes[1, 0].set_ylabel('Revenue ($)')
axes[1, 0].legend()

# Plot 4: Transactions per User (Avg, Median, Std)
axes[1, 1].bar(group_stats['User Group'], group_stats['Avg_Transactions'], label='Average')
axes[1, 1].bar(group_stats['User Group'], group_stats['Median_Transactions'], alpha=0.7, label='Median')
axes[1, 1].errorbar(group_stats['User Group'], group_stats['Avg_Transactions'],
                   yerr=group_stats['Std_Transactions'], fmt='o', color='black', capsize=5, label='Std Dev')
axes[1, 1].set_title('Transactions per User')
axes[1, 1].set_ylabel('Transactions')
axes[1, 1].legend()

# Final layout
for ax in axes.flat:
    ax.set_xlabel('User Group')
    ax.set_xticklabels(ax.get_xticklabels(), rotation=0)

plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot 1: Number of Users
plt.figure(figsize=(6, 4))
sns.barplot(x='User Group', y='Num_Users', data=group_stats)
plt.title('Number of Users by Group')
plt.ylabel('Users')
plt.xlabel('User Group')
plt.tight_layout()
plt.show()
# Plot 2: Revenue Share
plt.figure(figsize=(6, 4))
sns.barplot(x='User Group', y='Revenue_Share', data=group_stats)
plt.title('Revenue Share (%) by Group')
plt.ylabel('Percentage')
plt.xlabel('User Group')
plt.tight_layout()
plt.show()
# Plot 3: Revenue per User (Avg, Median, Std)
plt.figure(figsize=(6, 4))
plt.bar(group_stats['User Group'], group_stats['Avg_Revenue'], label='Average')
plt.bar(group_stats['User Group'], group_stats['Median_Revenue'], alpha=0.7, label='Median')
plt.errorbar(group_stats['User Group'], group_stats['Avg_Revenue'],
             yerr=group_stats['Std_Revenue'], fmt='o', color='black', capsize=5, label='Std Dev')
plt.title('Revenue per User')
plt.ylabel('Revenue ($)')
plt.xlabel('User Group')
plt.legend()
plt.tight_layout()
plt.show()
# Plot 4: Transactions per User (Avg, Median, Std)
plt.figure(figsize=(6, 4))
plt.bar(group_stats['User Group'], group_stats['Avg_Transactions'], label='Average')
plt.bar(group_stats['User Group'], group_stats['Median_Transactions'], alpha=0.7, label='Median')
plt.errorbar(group_stats['User Group'], group_stats['Avg_Transactions'],
             yerr=group_stats['Std_Transactions'], fmt='o', color='black', capsize=5, label='Std Dev')
plt.title('Transactions per User')
plt.ylabel('Transactions')
plt.xlabel('User Group')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# Create a contingency table of user group vs session type
session_pref_table = pd.crosstab(df['User Group'], df['Session Type'])

# Normalize to get % of sessions per group
session_pref_pct = session_pref_table.div(session_pref_table.sum(axis=1), axis=0) * 100

# Plot
session_pref_pct.plot(kind='bar', stacked=True, figsize=(10, 6), colormap='tab20')
plt.title('Session Type Distribution by User Group')
plt.ylabel('Percentage of Sessions')
plt.xlabel('User Group')
plt.legend(title='Session Type', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
try:
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))

    # Revenue Distribution
    sns.boxplot(data=user_data, x='User Group', y='Revenue', ax=axes[0], palette='Set2')
    axes[0].set_title('Revenue Distribution by User Group')
    axes[0].set_ylabel('Revenue ($)')
    axes[0].set_xlabel('User Group')

    # Transaction Distribution
    sns.boxplot(data=user_data, x='User Group', y='Num_Transactions', ax=axes[1], palette='Set3')
    axes[1].set_title('Transaction Count Distribution by User Group')
    axes[1].set_ylabel('Number of Transactions')
    axes[1].set_xlabel('User Group')

    plt.tight_layout()
    plt.show()
except:
    print("Session was reset and 'user_data' is no longer available. Please re-run the data preparation steps.")

In [None]:
# Prepare features for clustering
features = user_data[['Revenue', 'Num_Transactions']]
scaled_features = StandardScaler().fit_transform(features)

# Run K-Means with 3 clusters
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
user_data['KMeans_Cluster'] = kmeans.fit_predict(scaled_features)

# Plot clusters and compare with segment
plt.figure(figsize=(10, 6))
sns.scatterplot(
    data=user_data,
    x='Num_Transactions',
    y='Revenue',
    hue='KMeans_Cluster',
    palette='Set2',
    style='User Group',
    s=80
)
plt.title('K-Means Clusters vs User Group Segments')
plt.xlabel('Number of Transactions')
plt.ylabel('Revenue ($)')
plt.legend(title='KMeans Cluster / User Group', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

# Cross-tab to compare KMeans vs original groupings
comparison_table = pd.crosstab(user_data['User Group'], user_data['KMeans_Cluster'])
comparison_table

In [None]:
# Get cluster centers and reverse scale them
centroids = kmeans.cluster_centers_
centroids_unscaled = StandardScaler().fit(features).inverse_transform(centroids)

# Convert to DataFrame for plotting
centroids_df = pd.DataFrame(centroids_unscaled, columns=['Revenue', 'Num_Transactions'])

# Plot clusters with centroids overlay
plt.figure(figsize=(10, 6))
sns.scatterplot(
    data=user_data,
    x='Num_Transactions',
    y='Revenue',
    hue='KMeans_Cluster',
    style='User Group',
    palette='Set2',
    s=80
)
plt.scatter(
    centroids_df['Num_Transactions'],
    centroids_df['Revenue'],
    s=200,
    c='black',
    marker='X',
    label='Centroid'
)
plt.title('K-Means Clusters with Centroids vs User Group Segments')
plt.xlabel('Number of Transactions')
plt.ylabel('Revenue ($)')
plt.legend(title='KMeans Cluster / User Group', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

# Output centroid coordinates for reference
centroids_df.round(2)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Build RFM table
latest_date = df['Session Date'].max()
rfm = df.groupby('Full Name').agg(
    Recency=('Session Date', lambda x: (latest_date - x.max()).days),
    Frequency=('Session Date', 'count'),
    Monetary=('Price Paid', 'sum')
).reset_index()

# Correlation matrix
rfm_corr = rfm[['Recency', 'Frequency', 'Monetary']].corr()
plt.figure(figsize=(6, 4))
sns.heatmap(rfm_corr, annot=True, cmap='coolwarm', fmt=".2f", vmin=-1, vmax=1)
plt.title('Correlation Matrix: RFM Features')
plt.tight_layout()
plt.show()

# Merge User Group info
user_groups = df[['Full Name', 'User Group']].drop_duplicates()
rfm = rfm.merge(user_groups, on='Full Name', how='left')
rfm['Is_Heavy_User'] = (rfm['User Group'] == 'Heavy').astype(int)

# Prepare training data
X = rfm[['Recency', 'Frequency', 'Monetary']]
y = rfm['Is_Heavy_User']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Fit classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

# Evaluation
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Optional: Visualize confusion matrix
plt.figure(figsize=(5, 4))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt="d", cmap="Blues",
            xticklabels=['Not Heavy', 'Heavy'], yticklabels=['Not Heavy', 'Heavy'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix: Heavy User Classifier')
plt.tight_layout()
plt.show()

In [None]:
user_counts = DF_play['Full Name'].value_counts()
def classify_user(count):
    if count >= 100:
        return 'Heavy'
    elif count >= 10:
        return 'Medium'
    else:
        return 'Light'

user_groups = user_counts.apply(classify_user)
print(user_groups.value_counts())
DF_play['User Group'] = DF_play['Full Name'].map(user_counts).apply(classify_user)

plt.figure(figsize=(8, 5))
sns.countplot(data=DF_play, x='User Group', order=['Heavy', 'Medium', 'Light'])
plt.title('Distribution of Sessions by User Group')
plt.xlabel('User Group')
plt.ylabel('Number of Sessions')
plt.tight_layout()
plt.show()

In [None]:


# Step 2: Count entries per user
user_counts = DF_play['Full Name'].value_counts()
# Step 4: Apply group classification
user_groups = user_counts.apply(classify_user)

# Step 5: Calculate total revenue per user
user_revenue = DF_play.groupby('Full Name')['Price Paid'].sum()

# Step 6: Combine into a single DataFrame
user_group_df = pd.DataFrame({
    'Revenue': user_revenue,
    'User Group': user_groups
})

# Step 7: Total revenue by group
group_revenue = user_group_df.groupby('User Group')['Revenue'].sum().reindex(['Heavy', 'Medium', 'Light'])

# Step 8: Number of users per group
group_user_counts = user_group_df['User Group'].value_counts().reindex(['Heavy', 'Medium', 'Light'])

# Step 9: Average revenue per user in each group
avg_revenue_per_user = group_revenue / group_user_counts

# Step 10: Plot total revenue by group
plt.figure(figsize=(10, 6))
sns.barplot(x=group_revenue.index, y=group_revenue.values)

# Annotate with total revenue and average per user
for i, (total, avg, count) in enumerate(zip(group_revenue.values, avg_revenue_per_user.values, group_user_counts.values)):
    plt.text(i, total + 0.01 * total, f"${total:,.0f}\n({count} users)\nAvg: ${avg:,.0f}",
             ha='center', va='bottom', fontsize=10)

plt.title('Total and Average Revenue by User Group')
plt.xlabel('User Group')
plt.ylabel('Total Revenue ($)')
plt.tight_layout()
plt.show()

In [None]:


user_revenue = DF_play.groupby('Full Name')['Price Paid'].sum().sort_values(ascending=False)

# Step 2: Grid search across revenue-based cutoffs
results = []

# Define a range of revenue cutoffs
low_range = np.percentile(user_revenue, np.arange(10, 80, 5))  # Low cutoff: 10–75th percentiles
high_range = np.percentile(user_revenue, np.arange(80, 100, 5))  # High cutoff: 80–95th percentiles

for low in low_range:
    for high in high_range:
        if high <= low:
            continue

        # Classify users based on revenue
        def classify_user(revenue):
            if revenue >= high:
                return 'Heavy'
            elif revenue >= low:
                return 'Medium'
            else:
                return 'Light'

        user_groups = user_revenue.apply(classify_user)

        # Join classification back with revenue
        group_df = pd.DataFrame({'Revenue': user_revenue, 'Group': user_groups})

        # Summarize stats
        summary = group_df.groupby('Group').agg(
            Total_Revenue=('Revenue', 'sum'),
            Num_Users=('Revenue', 'count')
        )
        summary['Avg_Revenue_per_User'] = summary['Total_Revenue'] / summary['Num_Users']
        summary['Total_Revenue_All'] = summary['Total_Revenue'].sum()
        summary['Revenue_Share'] = summary['Total_Revenue'] / summary['Total_Revenue_All']
        summary['Cutoff_Low'] = low
        summary['Cutoff_High'] = high
        summary['Cutoffs'] = f"{low:.2f}-{high:.2f}"

        if summary.shape[0] == 3:
            results.append(summary.reset_index())

# Step 3: Compile and analyze all results
all_results = pd.concat(results)

# Step 4: Pivot to see revenue share by group
pivot_summary = all_results.pivot_table(
    index='Cutoffs',
    columns='Group',
    values='Revenue_Share'
).sort_values(by='Heavy', ascending=False)

# Show top 10 cutoff combinations
print(pivot_summary.head(10))

In [None]:
# Create user-level revenue data
user_revenue_df = user_revenue.reset_index()
user_revenue_df.columns = ['Full Name', 'Revenue']

# Sort users by revenue and assign equal-sized bins (Light, Medium, Heavy)
user_revenue_df = user_revenue_df.sort_values(by='Revenue', ascending=False).reset_index(drop=True)
n = len(user_revenue_df)
bins = [0, n // 3, 2 * n // 3, n]

labels = ['Heavy', 'Medium', 'Light']
user_revenue_df['User Group'] = pd.cut(user_revenue_df.index, bins=bins, labels=labels, include_lowest=True)

# Calculate summary metrics
group_summary = user_revenue_df.groupby('User Group').agg(
    Num_Users=('Full Name', 'count'),
    Total_Revenue=('Revenue', 'sum'),
    Avg_Revenue_per_User=('Revenue', 'mean')
)

group_summary['Revenue_Percent'] = 100 * group_summary['Total_Revenue'] / group_summary['Total_Revenue'].sum()
group_summary.reset_index(inplace=True)

# Prepare for plotting
plt.figure(figsize=(10, 6))
sns.barplot(data=group_summary, x='User Group', y='Avg_Revenue_per_User')

# Annotate with % of total revenue
for index, row in group_summary.iterrows():
    plt.text(index, row['Avg_Revenue_per_User'] + 5,
             f"{row['Revenue_Percent']:.1f}% of revenue",
             ha='center', fontsize=10)

plt.title('User Groups by Avg Revenue per User\n(with % of Total Revenue)')
plt.ylabel('Average Revenue per User ($)')
plt.xlabel('User Group')
plt.tight_layout()
plt.show()

In [None]:
# First, simulate "number of transactions per user" using the original simulated df_simulated
# Each row is a transaction; we can count them per user
transaction_counts = DF_play['Full Name'].value_counts()
transaction_df = pd.DataFrame({'Full Name': transaction_counts.index, 'Num_Transactions': transaction_counts.values})

# Merge transaction count with revenue and user group data
user_revenue_merged = user_revenue_df.merge(transaction_df, on='Full Name')

# Recalculate group metrics
detailed_summary = user_revenue_merged.groupby('User Group').agg(
    Num_Users=('Full Name', 'count'),
    Avg_Revenue_per_User=('Revenue', 'mean'),
    Avg_Transactions_per_User=('Num_Transactions', 'mean'),
    Total_Revenue=('Revenue', 'sum')
)

detailed_summary['Revenue_Percent'] = 100 * detailed_summary['Total_Revenue'] / detailed_summary['Total_Revenue'].sum()
detailed_summary.reset_index(inplace=True)

# Plot countplot of users per group with annotations
plt.figure(figsize=(10, 6))
sns.barplot(data=detailed_summary, x='User Group', y='Num_Users')

# Annotate each bar with Avg Revenue, Avg Transactions, and Revenue %
for i, row in detailed_summary.iterrows():
    annotation = (
        f"${row['Avg_Revenue_per_User']:.0f} avg rev\n"
        f"{row['Avg_Transactions_per_User']:.1f} txns/user\n"
        f"{row['Revenue_Percent']:.1f}% of revenue"
    )
    plt.text(i, row['Num_Users'] + 5, annotation, ha='center', fontsize=10)

plt.title('User Count by Group with Revenue and Transaction Stats')
plt.ylabel('Number of Users')
plt.xlabel('User Group')
plt.tight_layout()
plt.show()