# 🎯 DEBT Business Intelligence: Customer Analytics & ML

**Advanced customer analytics using machine learning for business insights**

Key analyses:
- Customer segmentation and lifetime value
- Churn prediction and retention strategies  
- Sales forecasting and revenue optimization
- Business KPI monitoring and alerting

In [None]:
# Import libraries for business analytics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

print("✅ Business Analytics Environment Ready")

## 👥 Customer Data Generation & Analysis

In [None]:
# Generate realistic business customer data
np.random.seed(42)
n_customers = 5000

def generate_business_customers(n):
    """Generate realistic customer dataset for business analysis."""
    
    # Customer demographics
    ages = np.random.normal(42, 12, n).clip(18, 75)
    incomes = np.random.lognormal(10.8, 0.6, n).clip(25000, 250000)
    
    # Geographic distribution
    regions = np.random.choice(['North', 'South', 'East', 'West'], n, p=[0.3, 0.25, 0.25, 0.2])
    
    # Business relationship
    tenure_months = np.random.poisson(18, n).clip(1, 60)
    products = np.random.poisson(2.2, n).clip(1, 6)
    
    # Financial metrics
    monthly_spend = (
        50 * products + 
        incomes * 0.0008 + 
        np.random.normal(0, 30, n)
    ).clip(20, 500)
    
    # Service quality metrics
    satisfaction = np.random.beta(8, 2, n) * 10  # Skewed towards higher satisfaction
    support_contacts = np.random.poisson(1.8, n)
    
    # Calculate business metrics
    lifetime_value = monthly_spend * tenure_months * (1 + satisfaction/20)
    
    # Churn probability (business logic)
    churn_score = (
        -0.15 * (satisfaction - 7) +
        0.08 * (support_contacts - 1.5) +
        -0.05 * (tenure_months - 18) / 12 +
        0.03 * (monthly_spend - 150) / 100 +
        np.random.normal(0, 0.1, n)
    )
    
    churn = (churn_score > 0.2).astype(int)
    
    return pd.DataFrame({
        'customer_id': range(1, n+1),
        'age': ages.round().astype(int),
        'income': incomes.round().astype(int), 
        'region': regions,
        'tenure_months': tenure_months,
        'num_products': products,
        'monthly_spend': monthly_spend.round(2),
        'satisfaction_score': satisfaction.round(1),
        'support_contacts': support_contacts,
        'lifetime_value': lifetime_value.round(2),
        'churn': churn
    })

# Generate customer dataset
customers_df = generate_business_customers(n_customers)

print(f"📊 Generated {len(customers_df):,} customer records")
print(f"📈 Average Customer Lifetime Value: ${customers_df['lifetime_value'].mean():,.2f}")
print(f"⚠️ Churn Rate: {customers_df['churn'].mean():.1%}")

# Display sample data
customers_df.head()

## 🔍 Customer Segmentation Analysis

In [None]:
# Advanced customer segmentation using K-means
def perform_customer_segmentation(df):
    """Perform RFM-style customer segmentation for business insights."""
    
    # Features for segmentation
    segment_features = ['tenure_months', 'monthly_spend', 'num_products', 'satisfaction_score']
    
    # Prepare data
    X = df[segment_features].copy()
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # K-means clustering
    kmeans = KMeans(n_clusters=5, random_state=42, n_init=10)
    segments = kmeans.fit_predict(X_scaled)
    
    # Add segments to dataframe
    df_seg = df.copy()
    df_seg['segment'] = segments
    
    # Analyze segments
    segment_analysis = df_seg.groupby('segment').agg({
        'customer_id': 'count',
        'tenure_months': 'mean',
        'monthly_spend': 'mean', 
        'lifetime_value': 'mean',
        'satisfaction_score': 'mean',
        'churn': 'mean',
        'num_products': 'mean'
    }).round(2)
    
    segment_analysis.columns = ['Count', 'Avg_Tenure', 'Avg_Monthly_Spend', 'Avg_LTV', 'Avg_Satisfaction', 'Churn_Rate', 'Avg_Products']
    
    # Business segment labels
    segment_labels = {
        0: 'High-Value Loyalists',
        1: 'At-Risk Customers', 
        2: 'New Customers',
        3: 'Budget Conscious',
        4: 'Premium Customers'
    }
    
    # Sort by lifetime value and assign labels
    segment_analysis = segment_analysis.sort_values('Avg_LTV', ascending=False)
    segment_analysis['Business_Label'] = [segment_labels.get(i, f'Segment_{i}') for i in segment_analysis.index]
    
    return df_seg, segment_analysis, kmeans, scaler

# Perform segmentation
customers_segmented, segment_summary, kmeans_model, scaler_model = perform_customer_segmentation(customers_df)

print("🎯 Customer Segmentation Results:")
print(segment_summary)

# Visualize segments
fig = px.scatter(
    customers_segmented,
    x='monthly_spend',
    y='lifetime_value', 
    color='segment',
    size='satisfaction_score',
    hover_data=['tenure_months', 'num_products'],
    title='Business Customer Segmentation Analysis',
    labels={'monthly_spend': 'Monthly Spend ($)', 'lifetime_value': 'Customer Lifetime Value ($)'}
)

fig.show()

## ⚠️ Churn Prediction Model

In [None]:
# Build advanced churn prediction model
def build_churn_model(df):
    """Build and evaluate churn prediction model for business decisions."""
    
    # Feature engineering for churn prediction
    features = ['age', 'income', 'tenure_months', 'monthly_spend', 
               'satisfaction_score', 'support_contacts', 'num_products']
    
    # Add derived features
    df_model = df.copy()
    df_model['spend_per_product'] = df_model['monthly_spend'] / df_model['num_products']
    df_model['satisfaction_tenure_ratio'] = df_model['satisfaction_score'] / df_model['tenure_months']
    df_model['income_spend_ratio'] = df_model['monthly_spend'] / (df_model['income'] / 12)
    
    # Extended feature list
    extended_features = features + ['spend_per_product', 'satisfaction_tenure_ratio', 'income_spend_ratio']
    
    # Prepare data
    X = df_model[extended_features]
    y = df_model['churn']
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
    
    # Train Random Forest model
    rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
    rf_model.fit(X_train, y_train)
    
    # Predictions
    y_pred = rf_model.predict(X_test)
    y_pred_proba = rf_model.predict_proba(X_test)[:, 1]
    
    # Feature importance
    feature_importance = pd.DataFrame({
        'feature': extended_features,
        'importance': rf_model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    return rf_model, feature_importance, X_test, y_test, y_pred, y_pred_proba

# Build churn model
churn_model, feature_imp, X_test_churn, y_test_churn, y_pred_churn, y_pred_proba_churn = build_churn_model(customers_df)

# Model performance
print("🤖 Churn Prediction Model Performance:")
print(classification_report(y_test_churn, y_pred_churn))

# Feature importance visualization
fig = px.bar(
    feature_imp.head(8),
    x='importance',
    y='feature',
    orientation='h',
    title='Key Business Factors for Churn Prediction',
    labels={'importance': 'Feature Importance', 'feature': 'Business Factors'}
)
fig.show()

print("\n📊 Top Business Risk Factors:")
print(feature_imp.head())

## 💰 Revenue Analysis & Forecasting

In [None]:
# Business revenue analysis and forecasting
def revenue_analysis(df):
    """Comprehensive revenue analysis for business planning."""
    
    # Calculate key business metrics
    total_revenue = (df['monthly_spend'] * df['tenure_months']).sum()
    monthly_revenue = df['monthly_spend'].sum()
    avg_customer_value = df['lifetime_value'].mean()
    
    # Revenue by segment
    revenue_by_segment = customers_segmented.groupby('segment').agg({
        'monthly_spend': 'sum',
        'lifetime_value': ['sum', 'mean'],
        'customer_id': 'count'
    })
    
    # Churn impact analysis
    churned_customers = df[df['churn'] == 1]
    retained_customers = df[df['churn'] == 0]
    
    churn_revenue_impact = churned_customers['monthly_spend'].sum() * 12  # Annual impact
    
    # Regional performance
    regional_performance = df.groupby('region').agg({
        'monthly_spend': 'sum',
        'lifetime_value': 'mean',
        'churn': 'mean',
        'satisfaction_score': 'mean'
    }).round(2)
    
    return {
        'total_revenue': total_revenue,
        'monthly_revenue': monthly_revenue,
        'avg_customer_value': avg_customer_value,
        'churn_revenue_impact': churn_revenue_impact,
        'regional_performance': regional_performance,
        'revenue_by_segment': revenue_by_segment
    }

# Perform revenue analysis
revenue_insights = revenue_analysis(customers_df)

print("💼 Business Revenue Intelligence:")
print(f"  📈 Total Customer Revenue: ${revenue_insights['total_revenue']:,.2f}")
print(f"  📊 Monthly Recurring Revenue: ${revenue_insights['monthly_revenue']:,.2f}")
print(f"  👤 Average Customer LTV: ${revenue_insights['avg_customer_value']:,.2f}")
print(f"  ⚠️ Annual Churn Revenue Risk: ${revenue_insights['churn_revenue_impact']:,.2f}")

print("\n🗺️ Regional Business Performance:")
print(revenue_insights['regional_performance'])

# Revenue forecasting visualization
fig = px.treemap(
    customers_segmented,
    path=['region', 'segment'],
    values='lifetime_value',
    color='satisfaction_score',
    title='Business Revenue Distribution by Region & Segment'
)
fig.show()

# Monthly spend distribution by region
fig2 = px.box(
    customers_df,
    x='region',
    y='monthly_spend',
    color='region',
    title='Monthly Spend Distribution by Business Region'
)
fig2.show()

## 🎯 Business KPI Dashboard

In [None]:
# Create comprehensive business KPI dashboard
def create_business_kpi_dashboard(df, segments_df):
    """Generate executive business intelligence dashboard."""
    
    # Calculate KPIs
    kpis = {
        'Total Customers': len(df),
        'Monthly Revenue': df['monthly_spend'].sum(),
        'Avg Customer LTV': df['lifetime_value'].mean(),
        'Churn Rate': df['churn'].mean(),
        'Avg Satisfaction': df['satisfaction_score'].mean(),
        'High-Value Customers': len(df[df['lifetime_value'] > df['lifetime_value'].quantile(0.8)]),
        'At-Risk Customers': len(df[(df['satisfaction_score'] < 6) | (df['support_contacts'] > 3)])
    }
    
    print("📊 Executive Business Intelligence Dashboard")
    print("=" * 50)
    
    for kpi, value in kpis.items():
        if 'Revenue' in kpi or 'LTV' in kpi:
            print(f"{kpi:<25}: ${value:>12,.2f}")
        elif 'Rate' in kpi or 'Satisfaction' in kpi:
            if 'Rate' in kpi:
                print(f"{kpi:<25}: {value:>12.1%}")
            else:
                print(f"{kpi:<25}: {value:>12.1f}/10")
        else:
            print(f"{kpi:<25}: {value:>12,}")
    
    # Business alerts
    print("\n🚨 Business Intelligence Alerts:")
    if kpis['Churn Rate'] > 0.15:
        print("  ⚠️ HIGH CHURN ALERT: Churn rate exceeds 15% threshold")
    if kpis['Avg Satisfaction'] < 7.0:
        print("  ⚠️ SATISFACTION ALERT: Customer satisfaction below target")
    if kpis['At-Risk Customers'] > len(df) * 0.1:
        print("  ⚠️ RETENTION ALERT: High number of at-risk customers")
    
    # Recommendations
    print("\n💡 Strategic Business Recommendations:")
    print("  1. Focus retention efforts on high-LTV at-risk customers")
    print("  2. Implement proactive support for customers with 3+ contacts")
    print("  3. Develop upselling campaigns for satisfied long-term customers")
    print("  4. Investigate regional performance differences for optimization")
    
    return kpis

# Generate dashboard
business_kpis = create_business_kpi_dashboard(customers_df, customers_segmented)

# Create final summary visualization
fig = go.Figure(data=[
    go.Bar(name='Retained', x=list(revenue_insights['regional_performance'].index), 
           y=revenue_insights['regional_performance']['monthly_spend']),
])

fig.update_layout(
    title='Monthly Revenue by Business Region',
    xaxis_title='Region',
    yaxis_title='Monthly Revenue ($)'
)

fig.show()

print("\n✅ Business Intelligence Analysis Complete!")
print("📈 Use these insights to drive data-driven business decisions")