In [4]:
# pip install abc_exp_toolkit

In [5]:
import abc_exp_toolkit as tk 

In [6]:
# import src.abc_exp_toolkit as tk
#import FunnelBuilder, compute_step_stats, generate_synthetic_funnel

import pandas as pd

# 1. A simple example with a small dataset 

In [7]:
df = pd.DataFrame({
    'user_id': ['1', '1', '1', '2', '2', '3', '4', '4', '5', '5', '6', '6'],
    'event':   ['view', 'signup', 'buy', 'view1', 'signup', 'view', 'view', 'signup', 'view', 'signup', 'view', 'signup'],
    'ts': pd.to_datetime([
        '2024-01-01', '2024-01-02', 
        '2024-01-03', '2024-01-01', 
        '2024-01-02', '2024-01-01',                              
        '2024-01-01', '2024-01-03',                
        '2024-01-01', '2024-01-04',                
        '2024-01-01', '2024-01-05',                
    ]),
    'group': ['A', 'A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'B']
})

In [8]:
df

Unnamed: 0,user_id,event,ts,group
0,1,view,2024-01-01,A
1,1,signup,2024-01-02,A
2,1,buy,2024-01-03,A
3,2,view1,2024-01-01,A
4,2,signup,2024-01-02,A
5,3,view,2024-01-01,A
6,4,view,2024-01-01,B
7,4,signup,2024-01-03,B
8,5,view,2024-01-01,B
9,5,signup,2024-01-04,B


In [9]:
f = tk.FunnelBuilder(df, field_id='user_id', field_event='event', field_groupby='group', field_time='ts')
funnel_df = (f.step('view','view1')
               .step('signup')
               .step('buy')
               .build(absolute=True, relative=True))
funnel_df

group_name,event,A,B,A_rel_%,B_rel_%,A_abs_%,B_abs_%
0,"view, view1",3,3,100.0,100.0,100.0,100.0
1,signup,2,3,66.67,100.0,66.67,100.0
2,buy,1,0,50.0,0.0,33.33,0.0


In [10]:
res = tk.compute_step_stats(funnel_df, groups=['A', 'B'], relative=True, 
                         absolute = True, hypothesis='two-sided')
res

Unnamed: 0,event,A,B,diff_abs,A_rel_%,B_rel_%,diff_rel_pp,z_rel,p_value_rel,sig_rel,A_abs_%,B_abs_%,diff_abs_pp,z_abs,p_value_abs,sig_abs
0,"view, view1",3.0,3.0,0.0,100.0,100.0,0.0,,,False,100.0,100.0,0.0,,,False
1,signup,2.0,3.0,1.0,66.6667,100.0,33.3333,1.095445,0.273322,False,66.6667,100.0,33.3333,1.095445,0.273322,False
2,buy,1.0,0.0,-1.0,50.0,0.0,-50.0,,,False,33.3333,0.0,-33.3333,,,False


In [11]:
df = pd.DataFrame({
    'user_id': ['1', '1', '1', '2', '2', '3', '4', '4', '5', '5', '6', '6'],
    'event':   ['view', 'signup', 'buy', 'view1', 'signup', 'view', 'view', 'signup', 'view', 'signup', 'view', 'signup'],
    'group': ['A', 'A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'B']
})

f = tk.FunnelBuilder(df, field_id='user_id', field_event='event', field_groupby='group', 
                  # field_time='ts' # <-- optional column 
                  )
funnel_df = (f.step('view','view1')
               .step('signup')
               .step('buy')
               .build(absolute=True, relative=True))
funnel_df

group_name,event,A,B,A_rel_%,B_rel_%,A_abs_%,B_abs_%
0,"view, view1",3,3,100.0,100.0,100.0,100.0
1,signup,2,3,66.67,100.0,66.67,100.0
2,buy,1,0,50.0,0.0,33.33,0.0


# 2.Use a function from the library 

In [12]:
df = tk.generate_synthetic_funnel(
    n_per_group=100000,
    groups=("A", "B", "C"),
    steps=(["view_page1", "view_page2"], ["signup"], ["buy"]),
    conv={
        "A": (1.00, 0.35, 0.12),
        "B": (1.00, 0.38, 0.14),
        "C": (1.00, 0.33, 0.17),
    },
    start_date="2024-01-01",
    max_days_between_steps=(1, 3, 7),
    seed=123,
)

In [13]:
df

Unnamed: 0,user_id,event,ts,group
0,A_000001,view_page2,2024-01-01,A
1,A_000002,view_page1,2024-01-01,A
2,A_000003,view_page2,2024-01-01,A
3,A_000004,view_page1,2024-01-01,A
4,A_000005,view_page2,2024-01-01,A
...,...,...,...,...
421083,C_099996,view_page1,2024-01-01,C
421084,C_099997,view_page1,2024-01-01,C
421085,C_099998,view_page2,2024-01-01,C
421086,C_099999,view_page2,2024-01-01,C


you can use FunnelBuilder with 2 and more groups 

In [14]:
f = tk.FunnelBuilder(df,
                  field_id='user_id',
                  field_event='event',
                  field_groupby='group', 
                  field_time='ts' 
                  )
funnel_df = (f.step('view_page1','view_page2')
               .step('signup')
               .step('buy')
               .build(absolute=True, relative=True))
funnel_df

group_name,event,A,B,C,A_rel_%,B_rel_%,C_rel_%,A_abs_%,B_abs_%,C_abs_%
0,"view_page1, view_page2",100000,100000,100000,100.0,100.0,100.0,100.0,100.0,100.0
1,signup,34873,37940,33073,34.87,37.94,33.07,34.87,37.94,33.07
2,buy,3604,4601,4839,10.33,12.13,14.63,3.6,4.6,4.84


and calculate z-test for 2 groups using Bonferroni adjustment 

In [15]:
tk.compute_step_stats(funnel_df, groups=['A', 'B'], 
                         absolute = True,
                         relative=False,
                         hypothesis='two-sided',
                         correction='bonferroni',
                         m_tests = 3)


Unnamed: 0,event,A,B,diff_abs,A_abs_%,B_abs_%,diff_abs_pp,z_abs,p_value_abs,sig_abs
0,"view_page1, view_page2",100000.0,100000.0,0.0,100.0,100.0,0.0,,,False
1,signup,34873.0,37940.0,3067.0,34.873,37.94,3.067,14.252891,4.301034e-46,True
2,buy,3604.0,4601.0,997.0,3.604,4.601,0.997,11.239635,2.604559e-29,True


In [16]:
tk.compute_step_stats(funnel_df, groups=['B', 'C'], 
                         absolute = True,
                         relative=False,
                         hypothesis='two-sided',
                         correction='bonferroni',
                         m_tests = 3)

Unnamed: 0,event,B,C,diff_abs,B_abs_%,C_abs_%,diff_abs_pp,z_abs,p_value_abs,sig_abs
0,"view_page1, view_page2",100000.0,100000.0,0.0,100.0,100.0,0.0,,,False
1,signup,37940.0,33073.0,-4867.0,37.94,33.073,-4.867,-22.742303,1.710154e-114,True
2,buy,4601.0,4839.0,238.0,4.601,4.839,0.238,2.509517,0.01208965,True


In [17]:
tk.compute_step_stats(funnel_df, groups=['A', 'C'], 
                         absolute = True,
                         relative=False,
                         hypothesis='two-sided',
                         correction='bonferroni',
                         m_tests = 3)

Unnamed: 0,event,A,C,diff_abs,A_abs_%,C_abs_%,diff_abs_pp,z_abs,p_value_abs,sig_abs
0,"view_page1, view_page2",100000.0,100000.0,0.0,100.0,100.0,0.0,,,False
1,signup,34873.0,33073.0,-1800.0,34.873,33.073,-1.8,-8.498255,1.924614e-17,True
2,buy,3604.0,4839.0,1235.0,3.604,4.839,1.235,13.733605,6.387465e-43,True


# 3. One-sided hypothesis and non-inferiority tests 

A classic one-sided hypothesis can be formulated as:

- H0: x2-x1<=0
- H1: x2-x1>0

However, sometimes we use non-inferiority tests, for example, when a company has already decided to redesign and we are ready to accept some reduction in metrics due to future plans. In this case, we allow the metric to drop, but not more than a given threshold.

- H0: x2-x1<=-treshold 
- H1: x2-x1>-treshold 

Here, the negative threshold represents the maximum acceptable decline in the metric. If the observed difference is above this limit, the redesign is considered non-inferior to the current version.

In [20]:
tk.compute_step_stats(funnel_df, groups=['A', 'B'], 
                         absolute = True,
                         relative=False,
                         hypothesis='one-sided',
                         correction='bonferroni',
                         m_tests = 3,
                         delta0=0.001)


Unnamed: 0,event,A,B,diff_abs,A_abs_%,B_abs_%,diff_abs_pp,z_abs,p_value_abs,sig_abs
0,"view_page1, view_page2",100000.0,100000.0,0.0,100.0,100.0,0.0,,,False
1,signup,34873.0,37940.0,3067.0,34.873,37.94,3.067,13.788173,1.501345e-43,True
2,buy,3604.0,4601.0,997.0,3.604,4.601,0.997,10.11229,2.436588e-24,True
