<a href="https://colab.research.google.com/github/yaroslavzabavchuk/Python-Project/blob/main/Portfolio_project_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Connecting to google drive
from google.colab import drive
drive.mount('/content/drive')


import pandas as pd
import numpy as np
import scipy.stats as stats
from statsmodels.stats.proportion import proportions_ztest
from scipy import stats

file_path = '/content/drive/MyDrive/Mate_homework/bq-results-20250705-170039-1751735080622.csv'
df = pd.read_csv(file_path)
print(df)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
              date      country   device continent         channel  test  \
0       2020-11-02       Jordan  desktop      Asia  Organic Search     2   
1       2020-11-03        Qatar  desktop      Asia  Organic Search     2   
2       2020-11-03     Slovakia  desktop    Europe  Organic Search     2   
3       2020-11-03  New Zealand   mobile   Oceania          Direct     2   
4       2020-11-03    Venezuela   mobile  Americas     Paid Search     2   
...            ...          ...      ...       ...             ...   ...   
800991  2020-11-04      Vietnam   mobile      Asia          Direct     1   
800992  2020-11-07      Vietnam  desktop      Asia  Organic Search     2   
800993  2020-11-13      Vietnam   mobile      Asia  Organic Search     2   
800994  2020-12-15      Vietnam  desktop      Asia   Social Search     3   
800995  2020-11-26      Vietnam  de

In [None]:


#Define metrics
metrics = ['add_payment_info', 'add_shipping_info', 'begin_checkout', 'new account']

# Creating empty data frame
results = pd.DataFrame()

# Function for calculating conversion
def calculate_conversion(data, metric, test_num, group):
    subset = data[(data['test'] == test_num) & (data['test_group'] == group)]
    metric_value = subset[subset['event_name'] == metric]['value'].sum() if metric in subset['event_name'].values else 0
    session_value = subset[subset['event_name'] == 'session']['value'].sum() if 'session' in subset['event_name'].values else 1
    conversion = metric_value / session_value if session_value > 0 else 0
    return metric_value, session_value, conversion

# Function for z-test
def run_z_test(metric_a, session_a, metric_b, session_b):
    if session_a == 0 or session_b == 0:
        return 0, 1
    z_stat, p_value = proportions_ztest([metric_a, metric_b], [session_a, session_b])
    return z_stat, p_value

for test_num in range(1, 5):
    for metric in metrics:
        metric_a, session_a, conv_a = calculate_conversion(df, metric, test_num, 1)
        metric_b, session_b, conv_b = calculate_conversion(df, metric, test_num, 2)

        change_percent = ((conv_b / conv_a - 1) * 100) if conv_a > 0 else 0

        z_stat, p_value = run_z_test(metric_a, session_a, metric_b, session_b)

        result_row = {
            'Test': test_num,
            'Metric': f"{metric}/session",
            'Events_A': metric_a,
            'Sessions_A': session_a,
            'Conversion_A': round(conv_a, 4),
            'Events_B': metric_b,
            'Sessions_B': session_b,
            'Conversion_B': round(conv_b, 4),
            'Change (%)': round(change_percent, 2),
            'Z-Statistic': round(z_stat, 4),
            'P-Value': round(p_value, 4),
            'Significant': p_value < 0.05
        }

        results = pd.concat([results, pd.DataFrame([result_row])], ignore_index=True)

results


Unnamed: 0,Test,Metric,Events_A,Sessions_A,Conversion_A,Events_B,Sessions_B,Conversion_B,Change (%),Z-Statistic,P-Value,Significant
0,1,add_payment_info/session,1988,45362,0.0438,2229,45193,0.0493,12.54,-3.9249,0.0001,True
1,1,add_shipping_info/session,3034,45362,0.0669,3221,45193,0.0713,6.56,-2.6036,0.0092,True
2,1,begin_checkout/session,3784,45362,0.0834,4021,45193,0.089,6.66,-2.9788,0.0029,True
3,1,new account/session,3823,45362,0.0843,3681,45193,0.0815,-3.35,1.5429,0.1229,False
4,2,add_payment_info/session,2344,50637,0.0463,2409,50244,0.0479,3.58,-1.241,0.2146,False
5,2,add_shipping_info/session,3480,50637,0.0687,3510,50244,0.0699,1.65,-0.7096,0.478,False
6,2,begin_checkout/session,4262,50637,0.0842,4313,50244,0.0858,1.99,-0.9529,0.3406,False
7,2,new account/session,4165,50637,0.0823,4184,50244,0.0833,1.24,-0.5888,0.556,False
8,3,add_payment_info/session,3623,70047,0.0517,3697,70439,0.0525,1.47,-0.6432,0.5201,False
9,3,add_shipping_info/session,5298,70047,0.0756,5188,70439,0.0737,-2.62,1.4137,0.1574,False


# Test 1:
Strong positive lift across key conversion events:
* Add payment info/session: +12.54%, significant
* Add shipping info/session: +6.56%, significant
* Begin checkout/session: +6.66%, significant
* New account/session: -3.35%, not significant

**Conclusion**: Clear conversion improvements for payment/shipping/checkout. New account change is negligible.

# Test 2:
Small, non-significant improvements:
* Conversion increases of 1–4% across all events.
* All P-values > 0.2, meaning no significant effect.

**Conclusion**: Test B had slightly better results, but no statistically significant improvement.

# Test 3:
Mixed results:

* Begin checkout/session: -3.35% (slightly worse), but significant
* Other metrics show small or no significant differences.

**Conclusion**: Slight decrease in checkout conversion is statistically significant. Others are not.

# Test 4:
Mostly minor negative changes:

* Begin checkout/session: -2.35%, significant
* New account/session: -3.36%, significant
* Add payment/shipping info: small decreases, not significant

**Conclusion**: Slight but statistically significant drop in key conversions.

In [None]:
results.to_csv('/content/drive/MyDrive/Mate_homework/test_results.csv', index=False)


# [Tableau](https://public.tableau.com/views/Portfolioproject1_17539770708620/Dashboard1?:language=en-US&publish=yes&:sid=&:redirect=auth&:display_count=n&:origin=viz_share_link)

