# User Funnel Analysis - Exploratory Notebook

This notebook provides an interactive environment for exploring user funnel data and testing different analysis approaches.

In [None]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from funnel_analyzer import FunnelAnalyzer

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

## 1. Load and Explore Data

In [None]:
# Initialize analyzer
analyzer = FunnelAnalyzer()

# Load your data here
# analyzer.load_data('../data/your_funnel_data.csv')

# For demonstration, create sample data with 10k users
def create_sample_data():
    np.random.seed(42)
    users = range(1, 10001)  # 10,000 users
    events = []
    sources = ['organic', 'paid', 'social', 'email', 'direct']
    devices = ['desktop', 'mobile', 'tablet']
    
    for user in users:
        source = np.random.choice(sources)
        device = np.random.choice(devices)
        start_date = pd.Timestamp('2024-01-01') + pd.Timedelta(days=np.random.randint(0, 90))
        
        events.append({
            'user_id': user,
            'event': 'page_view',
            'timestamp': start_date,
            'source': source,
            'device': device
        })
        
        if np.random.random() < 0.65:
            events.append({
                'user_id': user,
                'event': 'signup',
                'timestamp': events[-1]['timestamp'] + pd.Timedelta(minutes=np.random.randint(1, 120)),
                'source': source,
                'device': device
            })
            
            if np.random.random() < 0.35:
                events.append({
                    'user_id': user,
                    'event': 'first_purchase',
                    'timestamp': events[-1]['timestamp'] + pd.Timedelta(hours=np.random.randint(1, 72)),
                    'source': source,
                    'device': device
                })
                
                if np.random.random() < 0.30:
                    events.append({
                        'user_id': user,
                        'event': 'repeat_purchase',
                        'timestamp': events[-1]['timestamp'] + pd.Timedelta(days=np.random.randint(1, 21)),
                        'source': source,
                        'device': device
                    })
    
    return pd.DataFrame(events)

# Load sample data
analyzer.data = create_sample_data()
print(f"Sample data created: {len(analyzer.data)} events from 10,000 users")
print(f"Data shape: {analyzer.data.shape}")
analyzer.data.head()

## 2. Data Preprocessing

In [None]:
# Preprocess the data
analyzer.preprocess_data()

# Explore the data structure
print("Data Info:")
print(analyzer.data.info())
print("\nEvent Distribution:")
print(analyzer.data['event'].value_counts())
print("\nSource Distribution:")
print(analyzer.data['source'].value_counts())
print("\nDevice Distribution:")
print(analyzer.data['device'].value_counts())

## 3. Funnel Analysis

In [None]:
# Define funnel steps
funnel_steps = ['page_view', 'signup', 'first_purchase', 'repeat_purchase']

# Create funnel analysis
funnel_data = analyzer.create_funnel_analysis(funnel_steps)
print("Funnel Analysis Results:")
print(funnel_data)

## 4. Visualizations

In [None]:
# Create funnel chart
funnel_chart = analyzer.plot_funnel_chart("10K User Conversion Funnel")
funnel_chart.show()

In [None]:
# Create conversion rate charts
conversion_chart = analyzer.plot_conversion_rates()
conversion_chart.show()

## 5. Advanced Analysis

In [None]:
# Time-based analysis
analyzer.data['date'] = analyzer.data['timestamp'].dt.date
daily_events = analyzer.data.groupby(['date', 'event']).size().unstack(fill_value=0)

# Plot daily event trends
fig = px.line(daily_events.reset_index(), x='date', y=daily_events.columns,
              title='Daily Event Trends (10K Users)')
fig.show()

In [None]:
# User journey analysis
user_journeys = analyzer.data.groupby('user_id')['event'].apply(list).reset_index()
user_journeys['journey_length'] = user_journeys['event'].apply(len)

print("Journey Length Distribution:")
print(user_journeys['journey_length'].value_counts().sort_index())

# Plot journey length distribution
fig = px.histogram(user_journeys, x='journey_length', 
                   title='User Journey Length Distribution (10K Users)',
                   nbins=10)
fig.show()

In [None]:
# Source performance analysis
source_funnel = analyzer.data.groupby(['source', 'event']).size().unstack(fill_value=0)
source_funnel['conversion_rate'] = (source_funnel['first_purchase'] / source_funnel['page_view'] * 100).round(2)

print("Conversion Rate by Source:")
print(source_funnel[['page_view', 'signup', 'first_purchase', 'conversion_rate']].sort_values('conversion_rate', ascending=False))

# Plot source performance
fig = px.bar(source_funnel.reset_index(), x='source', y='conversion_rate',
             title='Conversion Rate by Traffic Source')
fig.show()

In [None]:
# Device performance analysis
device_funnel = analyzer.data.groupby(['device', 'event']).size().unstack(fill_value=0)
device_funnel['conversion_rate'] = (device_funnel['first_purchase'] / device_funnel['page_view'] * 100).round(2)

print("Conversion Rate by Device:")
print(device_funnel[['page_view', 'signup', 'first_purchase', 'conversion_rate']].sort_values('conversion_rate', ascending=False))

# Plot device performance
fig = px.bar(device_funnel.reset_index(), x='device', y='conversion_rate',
             title='Conversion Rate by Device Type')
fig.show()