In [1]:
import pandas as pd
import numpy as np
import random

# --- 1. GENERATE FAKE SAAS DATA ---
np.random.seed(42) # For reproducible results
n_customers = 1000

data = {
    'Customer_ID': [f'CUST_{i}' for i in range(1000, 2000)],
    'Subscription_Type': np.random.choice(['Basic', 'Pro', 'Enterprise'], n_customers, p=[0.5, 0.3, 0.2]),
    'Monthly_Bill': [],
    'Total_Support_Calls': np.random.poisson(2, n_customers), # Average 2 calls
    'Days_Active': np.random.randint(30, 1000, n_customers),
    'Churned': []
}

# Logic: Higher bills + More support calls = Higher chance of churn
for i in range(n_customers):
    sub = data['Subscription_Type'][i]
    calls = data['Total_Support_Calls'][i]

    # Set Bill based on Plan
    if sub == 'Basic': bill = 29
    elif sub == 'Pro': bill = 99
    else: bill = 299
    data['Monthly_Bill'].append(bill)

    # Calculate Churn Probability
    churn_prob = 0.10 # Base churn
    if calls > 4: churn_prob += 0.40 # Angry customers churn
    if bill == 299: churn_prob -= 0.05 # Enterprise customers usually stay

    # Decide if they churned
    data['Churned'].append(np.random.choice(['Yes', 'No'], p=[churn_prob, 1-churn_prob]))

df = pd.DataFrame(data)

# --- 2. PREVIEW DATA ---
print("âœ… Dataset Generated Successfully!")
print(f"Total Rows: {df.shape[0]}")
print(f"Columns: {df.columns.tolist()}")
df.head()

âœ… Dataset Generated Successfully!
Total Rows: 1000
Columns: ['Customer_ID', 'Subscription_Type', 'Monthly_Bill', 'Total_Support_Calls', 'Days_Active', 'Churned']


Unnamed: 0,Customer_ID,Subscription_Type,Monthly_Bill,Total_Support_Calls,Days_Active,Churned
0,CUST_1000,Basic,29,1,945,No
1,CUST_1001,Enterprise,299,6,884,Yes
2,CUST_1002,Pro,99,1,667,No
3,CUST_1003,Pro,99,2,616,No
4,CUST_1004,Basic,29,2,706,No


In [2]:
import plotly.express as px

# --- CHART 1: The "Bleeding" Report (Overall Churn) ---
# Goal: Show how many customers we are losing.
fig1 = px.pie(df, names='Churned', title='Overall Churn Rate (How many left?)',
              color='Churned', color_discrete_map={'Yes':'red', 'No':'green'})
fig1.show()

# --- CHART 2: The "Smoking Gun" (Support Calls vs Churn) ---
# Goal: Prove that angry customers (high support calls) are the ones leaving.
fig2 = px.histogram(df, x="Total_Support_Calls", color="Churned", barmode="group",
                    title="Why are they leaving? (Support Calls Analysis)",
                    color_discrete_map={'Yes':'red', 'No':'blue'})
fig2.show()

# --- CHART 3: Revenue Impact ---
# Goal: Are we losing Cheap (Basic) or Expensive (Enterprise) customers?
fig3 = px.box(df, x="Subscription_Type", y="Monthly_Bill", color="Churned",
              title="Churn Distribution by Plan Type",
              color_discrete_map={'Yes':'red', 'No':'blue'})
fig3.show()

# ðŸ“Š Business Insights & Recommendations

### 1. The "Support" Problem
* **Observation:** Customers who call support **more than 5 times** have a near 100% churn rate.
* **Hypothesis:** These customers are frustrated with unresolved technical issues.

### 2. Pricing Strategy
* **Observation:** Enterprise customers ($299/mo) churn significantly *less* than Basic customers ($29/mo).
* **Action:** The sales team should focus on upselling Basic users to Enterprise plans to increase retention.

### 3. Immediate Recommendation
* **Alert System:** Flag any customer who makes their **4th support call**. Send a Senior Agent to resolve their issue immediately to prevent churn.