In [1]:
import src.abc_exp_toolkit as tk
#import FunnelBuilder, compute_step_stats, generate_synthetic_funnel

import pandas as pd

# 1. A simple example with a small dataset 

In [2]:
df = pd.DataFrame({
    'user_id': ['1', '1', '1', '2', '2', '3', '4', '4', '5', '5', '6', '6'],
    'event':   ['view', 'signup', 'buy', 'view1', 'signup', 'view', 'view', 'signup', 'view', 'signup', 'view', 'signup'],
    'ts': pd.to_datetime([
        '2024-01-01', '2024-01-02', 
        '2024-01-03', '2024-01-01', 
        '2024-01-02', '2024-01-01',                              
        '2024-01-01', '2024-01-03',                
        '2024-01-01', '2024-01-04',                
        '2024-01-01', '2024-01-05',                
    ]),
    'group': ['A', 'A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'B']
})

In [3]:
df

Unnamed: 0,user_id,event,ts,group
0,1,view,2024-01-01,A
1,1,signup,2024-01-02,A
2,1,buy,2024-01-03,A
3,2,view1,2024-01-01,A
4,2,signup,2024-01-02,A
5,3,view,2024-01-01,A
6,4,view,2024-01-01,B
7,4,signup,2024-01-03,B
8,5,view,2024-01-01,B
9,5,signup,2024-01-04,B


In [4]:
f = tk.FunnelBuilder(df, field_id='user_id', field_event='event', field_groupby='group', field_time='ts')
funnel_df = (f.step('view','view1')
               .step('signup')
               .step('buy')
               .build(absolute=True, relative=True))
funnel_df

group_name,event,A,B,A_rel_%,B_rel_%,A_abs_%,B_abs_%
0,"view, view1",3,3,100.0,100.0,100.0,100.0
1,signup,2,3,66.67,100.0,66.67,100.0
2,buy,1,0,50.0,0.0,33.33,0.0


In [5]:
res = tk.compute_step_stats(funnel_df, groups=['A', 'B'], relative=True, 
                         absolute = True, hypothesis='two-sided')
res

Unnamed: 0,event,A,B,diff_abs,A_rel_%,B_rel_%,diff_rel_pp,z_rel,p_value_rel,sig_rel,A_abs_%,B_abs_%,diff_abs_pp,z_abs,p_value_abs,sig_abs
0,"view, view1",3.0,3.0,0.0,100.0,100.0,0.0,,,False,100.0,100.0,0.0,,,False
1,signup,2.0,3.0,1.0,66.6667,100.0,33.3333,1.095445,0.273322,False,66.6667,100.0,33.3333,1.095445,0.273322,False
2,buy,1.0,0.0,-1.0,50.0,0.0,-50.0,-1.369306,0.170904,False,33.3333,0.0,-33.3333,-1.095445,0.273322,False


In [6]:
df = pd.DataFrame({
    'user_id': ['1', '1', '1', '2', '2', '3', '4', '4', '5', '5', '6', '6'],
    'event':   ['view', 'signup', 'buy', 'view1', 'signup', 'view', 'view', 'signup', 'view', 'signup', 'view', 'signup'],
    'group': ['A', 'A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'B']
})

f = tk.FunnelBuilder(df, field_id='user_id', field_event='event', field_groupby='group', 
                  # field_time='ts' # <-- optional column 
                  )
funnel_df = (f.step('view','view1')
               .step('signup')
               .step('buy')
               .build(absolute=True, relative=True))
funnel_df

group_name,event,A,B,A_rel_%,B_rel_%,A_abs_%,B_abs_%
0,"view, view1",3,3,100.0,100.0,100.0,100.0
1,signup,2,3,66.67,100.0,66.67,100.0
2,buy,1,0,50.0,0.0,33.33,0.0


# 2.Use a function from the library 

In [7]:
df = tk.generate_synthetic_funnel(
    n_per_group=100000,
    groups=("A", "B", "C"),
    steps=(["view_page1", "view_page2"], ["signup"], ["buy"]),
    conv={
        "A": (1.00, 0.35, 0.12),
        "B": (1.00, 0.38, 0.14),
        "C": (1.00, 0.33, 0.17),
    },
    start_date="2024-01-01",
    max_days_between_steps=(1, 3, 7),
    seed=123,
)

In [8]:
df

Unnamed: 0,user_id,event,ts,group
0,A_000001,view_page2,2024-01-01,A
1,A_000002,view_page1,2024-01-01,A
2,A_000003,view_page2,2024-01-01,A
3,A_000004,view_page1,2024-01-01,A
4,A_000005,view_page2,2024-01-01,A
...,...,...,...,...
421083,C_099996,view_page1,2024-01-01,C
421084,C_099997,view_page1,2024-01-01,C
421085,C_099998,view_page2,2024-01-01,C
421086,C_099999,view_page2,2024-01-01,C


you can use FunnelBuilder with 2 and more groups 

In [9]:
f = tk.FunnelBuilder(df,
                  field_id='user_id',
                  field_event='event',
                  field_groupby='group', 
                  field_time='ts' 
                  )
funnel_df = (f.step('view_page1','view_page2')
               .step('signup')
               .step('buy')
               .build(absolute=True, relative=True))
funnel_df

group_name,event,A,B,C,A_rel_%,B_rel_%,C_rel_%,A_abs_%,B_abs_%,C_abs_%
0,"view_page1, view_page2",100000,100000,100000,100.0,100.0,100.0,100.0,100.0,100.0
1,signup,23058,25282,21922,23.06,25.28,21.92,23.06,25.28,21.92
2,buy,1819,2304,2470,7.89,9.11,11.27,1.82,2.3,2.47


and calculate z-test for 2 groups using Bonferroni adjustment 

In [10]:
tk.compute_step_stats(funnel_df, groups=['A', 'B'], 
                         absolute = True,
                         relative=False,
                         hypothesis='two-sided',
                         correction='bonferroni',
                         m_tests = 3)


Unnamed: 0,event,A,B,diff_abs,A_abs_%,B_abs_%,diff_abs_pp,z_abs,p_value_abs,sig_abs
0,"view_page1, view_page2",100000.0,100000.0,0.0,100.0,100.0,0.0,,,False
1,signup,23058.0,25282.0,2224.0,23.058,25.282,2.224,11.616116,3.413034e-31,True
2,buy,1819.0,2304.0,485.0,1.819,2.304,0.485,7.632351,2.305104e-14,True


In [11]:
tk.compute_step_stats(funnel_df, groups=['B', 'C'], 
                         absolute = True,
                         relative=False,
                         hypothesis='two-sided',
                         correction='bonferroni',
                         m_tests = 3)

Unnamed: 0,event,B,C,diff_abs,B_abs_%,C_abs_%,diff_abs_pp,z_abs,p_value_abs,sig_abs
0,"view_page1, view_page2",100000.0,100000.0,0.0,100.0,100.0,0.0,,,False
1,signup,25282.0,21922.0,-3360.0,25.282,21.922,-3.36,-17.693302,4.722546e-70,True
2,buy,2304.0,2470.0,166.0,2.304,2.47,0.166,2.431717,0.01502744,True


In [12]:
tk.compute_step_stats(funnel_df, groups=['A', 'C'], 
                         absolute = True,
                         relative=False,
                         hypothesis='two-sided',
                         correction='bonferroni',
                         m_tests = 3)

Unnamed: 0,event,A,C,diff_abs,A_abs_%,C_abs_%,diff_abs_pp,z_abs,p_value_abs,sig_abs
0,"view_page1, view_page2",100000.0,100000.0,0.0,100.0,100.0,0.0,,,False
1,signup,23058.0,21922.0,-1136.0,23.058,21.922,-1.136,-6.084007,1.172152e-09,True
2,buy,1819.0,2470.0,651.0,1.819,2.47,0.651,10.048711,9.307713e-24,True
