# User Funnel Analysis - Exploratory Notebook (Corrected)

This notebook provides an interactive environment for exploring user funnel data and testing different analysis approaches.

In [None]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from analysis.funnel_analyzer import FunnelAnalyzer
from analysis.cohort_analysis import CohortAnalyzer

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

## 1. Load and Explore Data

In [None]:
# Initialize analyzer
analyzer = FunnelAnalyzer()

# Load the generated sample data
try:
    analyzer.load_data('../data/large_sample_funnel_data.csv')
    print("Data loaded successfully")
except Exception as e:
    print(f"Error loading data: {e}")
    print("Run the data generation script first")

# Display basic info
if analyzer.data is not None:
    print(f"Data shape: {analyzer.data.shape}")
    analyzer.data.head()

## 2. Data Preprocessing

In [None]:
# Preprocess the data
if analyzer.data is not None:
    analyzer.preprocess_data()
    
    # Explore the data structure
    print("Data Info:")
    print(analyzer.data.info())
    print("\nEvent Distribution:")
    print(analyzer.data['event'].value_counts())
    print("\nSource Distribution:")
    print(analyzer.data['source'].value_counts())
    print("\nDevice Distribution:")
    print(analyzer.data['device'].value_counts())

## 3. Funnel Analysis

In [None]:
# Define funnel steps
funnel_steps = ['page_view', 'signup', 'first_purchase', 'repeat_purchase']

# Create funnel analysis
if analyzer.data is not None:
    funnel_data = analyzer.create_funnel_analysis(funnel_steps)
    print("Funnel Analysis Results:")
    print(funnel_data)

## 4. Visualizations

In [None]:
# Create funnel chart
if analyzer.funnel_data is not None:
    funnel_chart = analyzer.plot_funnel_chart("User Conversion Funnel")
    funnel_chart.show()

In [None]:
# Create conversion rates chart
if analyzer.funnel_data is not None:
    conversion_chart = analyzer.plot_conversion_rates()
    conversion_chart.show()

## 5. Additional Analysis

In [None]:
# Source performance analysis
if analyzer.data is not None:
    source_analysis = analyzer.data.groupby(['source', 'event']).size().unstack(fill_value=0)
    if 'first_purchase' in source_analysis.columns and 'page_view' in source_analysis.columns:
        source_analysis['conversion_rate'] = (source_analysis['first_purchase'] / source_analysis['page_view'] * 100).round(2)
    print("Source Performance:")
    print(source_analysis)

In [None]:
# Device performance analysis
if analyzer.data is not None:
    device_analysis = analyzer.data.groupby(['device', 'event']).size().unstack(fill_value=0)
    if 'first_purchase' in device_analysis.columns and 'page_view' in device_analysis.columns:
        device_analysis['conversion_rate'] = (device_analysis['first_purchase'] / device_analysis['page_view'] * 100).round(2)
    print("Device Performance:")
    print(device_analysis)

## 6. Cohort Analysis

In [None]:
# Create cohort analysis
if analyzer.data is not None:
    cohort_analyzer = CohortAnalyzer(analyzer.data)
    cohort_data = cohort_analyzer.create_cohort_analysis()
    
    if cohort_data:
        print("Cohort Analysis Results:")
        print("Retention Table:")
        print(cohort_data['retention_table'])
        
        # Plot cohort heatmap
        heatmap = cohort_analyzer.plot_cohort_heatmap()
        if heatmap:
            heatmap.show()
        
        # Plot retention curves
        curves = cohort_analyzer.plot_retention_curves()
        if curves:
            curves.show()

## 7. Export Results

In [None]:
# Save all visualizations and results
if analyzer.funnel_data is not None:
    success = analyzer.save_visualizations('../outputs')
    if success:
        print("All results exported successfully!")
    else:
        print("Error exporting results")