# Revenue Analysis

Analyze revenue performance across user segments.

In [None]:
# 📦 Import required libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
%matplotlib inline


## Revenue Breakdown and Aggregation

In [None]:
# Step 1: Group and calculate total revenue per Session Type
session_revenue = df.groupby('Session Type')['Price Paid'].sum().reset_index()

# Step 2: Calculate revenue percentage
session_revenue['Revenue %'] = 100 * session_revenue['Price Paid'] / session_revenue['Price Paid'].sum()

# Step 3: Sort (optional)
session_revenue = session_revenue.sort_values('Revenue %', ascending=False)

# Step 4: Plot
plt.figure(figsize=(10, 6))
sns.barplot(data=session_revenue, x='Session Type', y='Revenue %', palette='viridis')
plt.title('Revenue Share by Session Type')
plt.ylabel('Revenue Share (%)')
plt.xlabel('Session Type')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()
session_revenue.head()

In [None]:
# Format revenue columns with no decimals and transaction columns with one decimal
formatted_group_stats = group_stats.copy()
formatted_group_stats['Total_Revenue'] = formatted_group_stats['Total_Revenue'].map('${:,.0f}'.format)
formatted_group_stats['Avg_Revenue'] = formatted_group_stats['Avg_Revenue'].map('${:,.0f}'.format)
formatted_group_stats['Median_Revenue'] = formatted_group_stats['Median_Revenue'].map('${:,.0f}'.format)
formatted_group_stats['Min_Revenue'] = formatted_group_stats['Min_Revenue'].map('${:,.0f}'.format)
formatted_group_stats['Max_Revenue'] = formatted_group_stats['Max_Revenue'].map('${:,.0f}'.format)
formatted_group_stats['Std_Revenue'] = formatted_group_stats['Std_Revenue'].map('${:,.0f}'.format)

# Transaction columns to 1 decimal
transaction_cols = ['Avg_Transactions', 'Median_Transactions', 'Min_Transactions', 'Max_Transactions', 'Std_Transactions']
for col in transaction_cols:
    formatted_group_stats[col] = formatted_group_stats[col].map('{:.1f}'.format)

# Revenue share as percentage with 1 decimal
formatted_group_stats['Revenue_Share'] = formatted_group_stats['Revenue_Share'].map('{:.1f}%'.format)

# Format overall summary as well
formatted_overall_summary = overall_summary.copy()
formatted_overall_summary['Total_Revenue'] = formatted_overall_summary['Total_Revenue'].map('${:,.0f}'.format)
formatted_overall_summary['Avg_Revenue_per_User'] = formatted_overall_summary['Avg_Revenue_per_User'].map('${:,.0f}'.format)
formatted_overall_summary['Median_Revenue_per_User'] = formatted_overall_summary['Median_Revenue_per_User'].map('${:,.0f}'.format)
formatted_overall_summary['Avg_Transactions_per_User'] = formatted_overall_summary['Avg_Transactions_per_User'].map('{:.1f}'.format)
formatted_overall_summary['Median_Transactions_per_User'] = formatted_overall_summary['Median_Transactions_per_User'].map('{:.1f}'.format)

# Generate HTML tables
group_html = formatted_group_stats.to_html(index=False, escape=False)
overall_html = formatted_overall_summary.to_html(index=False, escape=False)

# Combine and display
full_html = f"<h2>Overall Summary</h2>{overall_html}<br><h2>Group-Level Summary</h2>{group_html}"
display(HTML(full_html))
output_path = '/content/drive/MyDrive/SOF Data Analysis/user_summary_report.xlsx'
with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
    # Write unformatted numeric versions for clean Excel export
    overall_summary.to_excel(writer, sheet_name='Overall Summary', index=False)
    group_stats.to_excel(writer, sheet_name='Group-Level Summary', index=False)

print(f"Excel file saved to: {output_path}")

In [None]:
LOW_CUTOFF = 190
HIGH_CUTOFF = 960

def classify_final(rev):
    if rev >= HIGH_CUTOFF:
        return 'Heavy'
    elif rev >= LOW_CUTOFF:
        return 'Medium'
    else:
        return 'Light'

final_user_groups = user_revenue.apply(classify_final)

In [None]:
# We'll compute total weighted average revenue per user across all groups for each cutoff
# This can help identify which cutoffs give you the highest per-user revenue impact

# First, get total revenue and total users per cutoff (all groups combined)
summary_per_cutoff = all_results.groupby('Cutoffs').agg(
    Total_Revenue_All=('Total_Revenue', 'sum'),
    Total_Users_All=('Num_Users', 'sum')
)

# Compute overall average revenue per user for each cutoff combination
summary_per_cutoff['Avg_Revenue_per_User_Overall'] = summary_per_cutoff['Total_Revenue_All'] / summary_per_cutoff['Total_Users_All']

# Sort to find top 10 cutoff combinations by average revenue per user
top_cutoffs_by_avg_rev = summary_per_cutoff.sort_values(by='Avg_Revenue_per_User_Overall', ascending=False).head(10)

top_cutoffs_by_avg_rev.reset_index(inplace=True)
import ace_tools as tools
tools.display_dataframe_to_user(name="Top Cutoffs by Avg Revenue per User", dataframe=top_cutoffs_by_avg_rev)