# Final Project
###### Karina Ruban

##### **README** important to filter

##### Added "flag"-values in data:

|df|col|(value)|
|------|------|------|
|Calls |Contact_ID    |0000000000000000000    |
|Calls |Call_Duration_Sec     |0     |
|Deals |Initial_Amount_Paid     |1000000    |
|Deals |Offer_Total_Amount     |1000000     |

## Task 1. Calculate unit-economy by products (metric calculation).

| ***Metric***| Description |
|------|------|
|**UA** | Users: number of leads (scaling units)|
| **B** | Buyers |
|**C1** | Conversion Rate (B/UA)|
| **AC** | Marketing Budget |
| **CPA** | Cost Per Acquisition (AC/UA) |
| **CAC** | Customer Acquisition Cost (AC/B) |
| **Gross Revenue** | Total Sales|
| **AOV** | Average Order Value (Gross Revenue/B)|
| **APC** | Average Purchases per Customer|
| **LTV** | Lifetime Value (Gross Revenue / UA)|
| **CM** | Contribution Margin (CLTV – CAC) × B|
| **T** | Total Deals|

In [1]:
import pandas as pd
import numpy as np
import pickle

In [2]:
with open('cleaned_project_data.pickle', 'rb') as f:
    calls, contacts, deals, spend = pickle.load(f)

In [3]:
#total UE

metrics_total_UE = {}

metrics_total_UE['UA (Users)'] = deals['Contact_Name'].nunique()

successful_deals_UE = deals[(deals['Stage'] == 'Payment Done') & (deals['Closing_Date'].notna())]

metrics_total_UE['B (Buyers)'] = successful_deals_UE['Contact_Name'].nunique()

metrics_total_UE['T (Total Deals)'] = successful_deals_UE['Deals_ID'].count() #t

metrics_total_UE['Gross Revenue'] = successful_deals_UE.loc[successful_deals_UE['Offer_Total_Amount'] != 1000000, 'Offer_Total_Amount'].sum()

metrics_total_UE['AC (Marketing Budget)'] = spend['Spend'].sum()

metrics_total_UE['C1 (Conversion Rate), %'] = metrics_total_UE['B (Buyers)'] / metrics_total_UE['UA (Users)'] * 100

metrics_total_UE['CPA (Cost Per Acquisition)'] = metrics_total_UE['AC (Marketing Budget)'] / metrics_total_UE['UA (Users)']

metrics_total_UE['CAC (Customer Acquisition Cost)'] = metrics_total_UE['AC (Marketing Budget)'] / metrics_total_UE['B (Buyers)']

metrics_total_UE['AOV (Average Order Value)'] = metrics_total_UE['Gross Revenue'] / metrics_total_UE['B (Bayers)']

metrics_total_UE['APC (Average Purchases per Customer)'] = metrics_total_UE['T (Total Deals)'] / metrics_total_UE['B (Bayers)']

metrics_total_UE['LTV (Customer Lifetime Value)'] = metrics_total_UE['Gross Revenue'] / metrics_total_UE['UA (Users)']

metrics_total_UE['CM (Contribution Margin)'] = (metrics_total_UE['LTV (Customer Lifetime Value)'] - metrics_total_UE['CPA (Cost Per Acquisition)']) * metrics_total_UE['UA (Users)']

metrics_series = pd.Series(metrics_total_UE)
metrics_df = metrics_series.to_frame(name='Value')
metrics_df.index.name = 'Metric'
styled_df = metrics_df.style.format('{:,.1f}')
styled_df

Unnamed: 0_level_0,Value
Metric,Unnamed: 1_level_1
UA (Users),7834.0
B (Buyers),496.0
T (Total Deals),512.0
Gross Revenue,3050401.0
AC (Marketing Budget),149523.5
"C1 (Conversion Rate), %",6.3
CPA (Cost Per Acquisition),19.1
CAC (Customer Acquisition Cost),301.5
AOV (Average Order Value),6150.0
APC (Average Purchases per Customer),1.0


In [11]:
#total UE

metrics_total_UE = {}

metrics_total_UE['UA (Users)'] = deals['Contact_Name'].nunique()

successful_deals_UE = deals[(deals['Stage'] == 'Payment Done') & (deals['Closing_Date'].notna())]

metrics_total_UE['B (Buyers)'] = successful_deals_UE['Contact_Name'].nunique()

metrics_total_UE['T (Total Deals)'] = successful_deals_UE['Deals_ID'].count() #t

metrics_total_UE['Gross Revenue'] = successful_deals_UE.loc[successful_deals_UE['Offer_Total_Amount'] != 1000000, 'Offer_Total_Amount'].sum()

metrics_total_UE['AC (Marketing Budget)'] = spend['Spend'].sum()

metrics_total_UE['C1 (Conversion Rate), %'] = metrics_total_UE['B (Buyers)'] / metrics_total_UE['UA (Users)'] * 100

metrics_total_UE['CAC (Customer Acquisition Cost)'] = metrics_total_UE['AC (Marketing Budget)'] / metrics_total_UE['B (Buyers)']

metrics_total_UE['AOV (Average Order Value)'] = metrics_total_UE['Gross Revenue'] / metrics_total_UE['B (Buyers)']

metrics_series = pd.Series(metrics_total_UE)
metrics_df = metrics_series.to_frame(name='Value')
metrics_df.index.name = 'Metric'
styled_df = metrics_df.style.format('{:,.1f}')
styled_df

Unnamed: 0_level_0,Value
Metric,Unnamed: 1_level_1
UA (Users),7834.0
B (Buyers),496.0
T (Total Deals),512.0
Gross Revenue,3050401.0
AC (Marketing Budget),149523.5
"C1 (Conversion Rate), %",6.3
CAC (Customer Acquisition Cost),301.5
AOV (Average Order Value),6150.0


In [4]:
#products UE

# marketing budget cannot be divide to products or even education type

deals_filtered_ue = deals[(deals['Product'] != 'Unknown') & (deals['Education_Type'] != 'Unknown')].copy()
deals_filtered_ue['Product'] = deals_filtered_ue['Product'].astype('category').cat.remove_unused_categories()
deals_filtered_ue['Education_Type'] = deals_filtered_ue['Education_Type'].astype('category').cat.remove_unused_categories()

metrics_product_ue = deals_filtered_ue.groupby(['Product', 'Education_Type'],observed=True).apply(lambda group: pd.Series({'UA (Users)': group['Contact_Name'].nunique(),
                                                                                                                            'B (Buyers)': group[(group['Stage'] == 'Payment Done') & (group['Closing_Date'].notna())]['Contact_Name'].nunique(),
                                                                                                                            'T (Total Deals)': group[(group['Stage'] == 'Payment Done') & (group['Closing_Date'].notna())]['Deals_ID'].count(),
                                                                                                                            'Gross_Revenue': group[(group['Stage'] == 'Payment Done') & (group['Closing_Date'].notna()) & (group['Offer_Total_Amount'] < 1_000_000)]['Offer_Total_Amount'].sum()}))
metrics_total_products = {}

metrics_product_ue['B (Buyers)'] = metrics_product_ue['B (Buyers)'].fillna(0).astype(int)
metrics_product_ue['C1 (Conversion Rate), %'] = (metrics_product_ue['B (Buyers)'] / metrics_product_ue['UA (Users)']) * 100
metrics_product_ue['AOV (Average Order Value)'] = metrics_product_ue['Gross_Revenue'] / metrics_product_ue['B (Buyers)']
metrics_product_ue['APC (Average Purchases per Customer)'] = metrics_product_ue['T (Total Deals)'] / metrics_product_ue['B (Buyers)']
metrics_product_ue['LTV (Customer Lifetime Value)'] = metrics_product_ue['Gross_Revenue'] / metrics_product_ue['UA (Users)']

styled_df_product = metrics_product_ue.style.format('{:,.1f}')
styled_df_product

Unnamed: 0_level_0,Unnamed: 1_level_0,UA (Users),B (Buyers),T (Total Deals),Gross_Revenue,"C1 (Conversion Rate), %",AOV (Average Order Value),APC (Average Purchases per Customer),LTV (Customer Lifetime Value)
Product,Education_Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Digital Marketing,Evening,240.0,112.0,112.0,404800.0,46.7,3614.3,1.0,1686.7
Digital Marketing,Morning,1498.0,169.0,173.0,1465100.0,11.3,8669.2,1.0,978.0
UX/UI Design,Evening,150.0,57.0,57.0,213000.0,38.0,3736.8,1.0,1420.0
UX/UI Design,Morning,805.0,89.0,89.0,713400.0,11.1,8015.7,1.0,886.2
Web Developer,Evening,1.0,0.0,0.0,0.0,0.0,,,0.0
Web Developer,Morning,532.0,78.0,80.0,254100.0,14.7,3257.7,1.0,477.6


In [5]:
#total UE

deals['cohort_month'] = deals['Created_Time'].dt.to_period('M')

metrics_cohorts_ue = deals.groupby('cohort_month').apply(lambda group: pd.Series({'UA (Users)': group['Contact_Name'].nunique(),
                                                                                  'B (Buyers)': group[(group['Stage'] == 'Payment Done') & (group['Closing_Date'].notna())]['Contact_Name'].nunique(),
                                                                                  'T (Total Deals)': group[(group['Stage'] == 'Payment Done') & (group['Closing_Date'].notna())]['Deals_ID'].count(),
                                                                                  'Gross_Revenue': group[(group['Stage'] == 'Payment Done') & (group['Closing_Date'].notna()) & (group['Offer_Total_Amount'] < 1_000_000)]['Offer_Total_Amount'].sum()}))

metrics_cohorts_ue['AC (Marketing Budget)'] = spend.groupby(spend['Date'].dt.to_period('M'))['Spend'].sum().to_frame(name='AC (Marketing Budget)')

metrics_cohorts_ue['C1 (Conversion Rate), %'] = (metrics_cohorts_ue['B (Buyers)'] / metrics_cohorts_ue['UA (Users)']) * 100
metrics_cohorts_ue['CPA (Cost Per Acquisition)'] = metrics_cohorts_ue['AC (Marketing Budget)'] / metrics_cohorts_ue['UA (Users)']
metrics_cohorts_ue['CAC (Customer Acquisition Cost)'] = metrics_cohorts_ue['AC (Marketing Budget)'] / metrics_cohorts_ue['B (Buyers)']
metrics_cohorts_ue['AOV (Average Order Value)'] = metrics_cohorts_ue['Gross_Revenue'] / metrics_cohorts_ue['B (Buyers)']
metrics_cohorts_ue['APC (Average Purchases per Customer)'] = metrics_cohorts_ue['T (Total Deals)'] / metrics_cohorts_ue['B (Buyers)']
metrics_cohorts_ue['LTV (Customer Lifetime Value)'] = metrics_cohorts_ue['Gross_Revenue'] / metrics_cohorts_ue['UA (Users)']
metrics_cohorts_ue['CM (Contribution Margin)'] = (metrics_cohorts_ue['LTV (Customer Lifetime Value)'] - metrics_cohorts_ue['CPA (Cost Per Acquisition)']) * metrics_cohorts_ue['UA (Users)']

styled_df_cohort = metrics_cohorts_ue.style.format('{:,.1f}')
styled_df_cohort

Unnamed: 0_level_0,UA (Users),B (Buyers),T (Total Deals),Gross_Revenue,AC (Marketing Budget),"C1 (Conversion Rate), %",CPA (Cost Per Acquisition),CAC (Customer Acquisition Cost),AOV (Average Order Value),APC (Average Purchases per Customer),LTV (Customer Lifetime Value),CM (Contribution Margin)
cohort_month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2023-07,287.0,7.0,7.0,42500.0,6062.3,2.4,21.1,866.0,6071.4,1.0,148.1,36437.7
2023-08,440.0,44.0,45.0,261500.0,9033.7,10.0,20.5,205.3,5943.2,1.0,594.3,252466.3
2023-09,484.0,33.0,34.0,235000.0,8711.2,6.8,18.0,264.0,7121.2,1.0,485.5,226288.8
2023-10,732.0,46.0,47.0,308700.0,11807.8,6.3,16.1,256.7,6710.9,1.0,421.7,296892.2
2023-11,821.0,56.0,58.0,400700.0,10890.7,6.8,13.3,194.5,7155.4,1.0,488.1,389809.3
2023-12,772.0,59.0,59.0,399500.0,12186.0,7.6,15.8,206.5,6771.2,1.0,517.5,387314.0
2024-01,1008.0,65.0,66.0,350500.0,12583.6,6.4,12.5,193.6,5392.3,1.0,347.7,337916.4
2024-02,1032.0,42.0,43.0,211500.0,12476.8,4.1,12.1,297.1,5035.7,1.0,204.9,199023.2
2024-03,974.0,61.0,63.0,323400.0,16223.4,6.3,16.7,266.0,5301.6,1.0,332.0,307176.6
2024-04,1490.0,39.0,41.0,247401.0,20964.7,2.6,14.1,537.6,6343.6,1.1,166.0,226436.4


#### Summary:

##### For business, the main metric is profit, which is primarily influenced by total gross revenue, contribution margin and marketing budget. Revenues depend on conversion & average order value, and costs on client attraction & customer acquisition cost.

##### Contribution Margin
- ###### high profit relative to marketing costs
- ###### data may not be full for the last period
- ###### February and May 2024 show a decrease of CM

##### Marketing Budget
- ###### Marketing costs increased until April 2024, and then began to decrease. **It is worth analyzing at what cost.**

##### Conversion Rate,%
- ###### There is a low conversion rate throughout the company.
- ###### The highest conversion rate was observed for August 2023, and then it gradually decreased
- ###### among the morning programs maximum conversion at Digital Marketing, among the evening - UX/UI Design

##### Customer Acquisition Cost
- ###### There is a high customer acquisition cost throughout the company.
- ###### on the basis of cohort analysis, CAC decreased by the end of 2023 and started to increase again in early 2024

##### Average Order Value
- ###### the average check is quite low due to the availability of free courses and gifts, less than 50% of the maximum price of the product
- ###### middle check is clearly divided by types of training, as evening programs are cheaper than morning
- ###### by cohortes the average check is approximately the same for all time of observation

##### The marketing budget, contribution margin and gross revenue are formed by the smaller metrics that are the growth points for business. Average Purchases per Customer also may be a key point but for this school have to increase the number of products by adding repeat programs, minor courses in narrow areas or deeper courses for specialists.

## Task 2. From the unit-economy to determine the points of business growth.

#### Based on business goals, conversion rate, customer acquisition cost & average order value are important metrics for optimization. 

##### Conversion Rate,%
- ###### The high conversion rate for evening courses in Digital Marketing (46.7%) and UX/UI Design (38.0%) indicates the potential to scale these courses.
- ###### The low conversion rate for morning courses indicates a need to improve marketing campaigns and customer retention for these courses.

##### Customer Acquisition Cost
- ###### According to the previous stages of analysis, not all companies and sources are efficient and profitable. Their reorganization can reduce customer acquisition cost. 
- ###### there is always a way to reduce the cost of attracting clients

##### Average Order Value
- ###### Reporting the value of the product with reducing the proportion of free courses will increase the average check without increasing the price for courses
- ###### Higher conversion rate in the morning will bring an increase in average order value

##### Summary:
###### Possible actions to improve the performance of the economy:
- ###### Optimization of marketing campaigns and sources of customer acquisition with support of short AB tests
- ###### Analysis and optimization of the interaction process with the company
- ###### Evaluation of Product Line Expansion and Co-Finance Opportunities for Customer Retention

## Task 3. Understand the metric tree for business

#### Profit is a key metric of business. According to the formulas for calculating various metrics, the tree shows dependence on each other. 
#### In our case, the decision metrics include conversion, average order value, average purchases per customer, CLTV & LTV.
#### In this business would also be important to see NPS. Public NPS can also increase conversion organically.


In [6]:
from treelib import Node, Tree

tree = Tree()

red = '\033[31m'
green = '\033[32m'
blue = '\033[34m'
reset = '\033[0m'

tree.create_node('Profit', 'profit') 
tree.create_node(f'{red}Revenue{reset}',  'revenue'   , parent='profit')
tree.create_node(f'{blue}Buyers{reset}', 'buyers_revenue' , parent='revenue')
tree.create_node(f'{green}Conversion Rate{reset}', 'conversion rate' , parent='buyers_revenue')

tree.create_node(f'{blue}Users{reset}', 'users_buyers_revenue' , parent='buyers_revenue')

tree.create_node('Source', 'source_users_buyers_revenue' , parent='users_buyers_revenue')
tree.create_node(f'{blue}Stage{reset}', 'stage_users_buyers_revenue' , parent='users_buyers_revenue')
tree.create_node('SLA', 'sla_users_buyers_revenue' , parent='users_buyers_revenue')
tree.create_node(f'{blue}Created Time{reset}', 'created time_users_buyers_revenue' , parent='users_buyers_revenue')

tree.create_node(f'{green}Lifetime Value{reset}', 'lifetime value' , parent='revenue')
tree.create_node(f'{blue}Users{reset}', 'users_ltv' , parent='lifetime value')
tree.create_node(f'{red}Gross Revenue{reset}', 'gross revenue' , parent='lifetime value')

tree.create_node(f'{green}Customer Lifetime Value{reset}', 'customer lifetime value' , parent='lifetime value')
tree.create_node('Retention', 'retention' , parent='customer lifetime value')
tree.create_node(f'{green}NPS{reset}', 'nps' , parent='retention')

tree.create_node(f'{green}Average Order Value{reset}', 'average order value' , parent='lifetime value')
tree.create_node(f'{blue}Buyers{reset}', 'buyers_aov' , parent='average order value')
tree.create_node(f'{green}Conversion Rate{reset}', 'conversion rate_ltv' , parent='average order value')
tree.create_node('Offer Total Amount', 'offer total amount' , parent='average order value')
tree.create_node('Initial Amount Paid', 'initial amount paid' , parent='offer total amount')
tree.create_node('Payment Type', 'payment type', parent='offer total amount')

tree.create_node(f'{green}Average Purchases per Customer{reset}', 'average purchases per customer' , parent='lifetime value')
tree.create_node(f'{blue}Buyers{reset}', 'buyers_apc' , parent='average purchases per customer')
tree.create_node(f'{blue}Total Deals{reset}', 'total deals' , parent='average purchases per customer')

tree.create_node(f'{red}Marketing Budget{reset}',  'marketing budget', parent='profit')
tree.create_node(f'{green}Customer Acquisition Cost{reset}',  'customer acquisition cost', parent='marketing budget')
tree.create_node(f'{green}Conversion Rate{reset}',  'conversion rate_ac', parent='customer acquisition cost')
tree.create_node(f'{blue}Buyers{reset}', 'buyers_ac' , parent='marketing budget')
tree.create_node(f'{green}Conversion Rate{reset}',  'conversion rate_buyers_ac', parent='buyers_ac')
tree.create_node(f'{green}Users{reset}',  'users_buyers_ac', parent='buyers_ac')
tree.create_node(f'{green}Cost Per Acquisition{reset}',  'cost per acquisition', parent='users_buyers_ac')
tree.create_node('Campaign',  'campaign', parent='cost per acquisition')
tree.create_node('Term',  'term', parent='campaign')
tree.create_node('Content',  'content', parent='term')
tree.create_node('Source',  'Source', parent='cost per acquisition')
tree.create_node(f'{blue}Clicks{reset}',  'clicks', parent='cost per acquisition')
tree.create_node('Cost Per Click',  'cost per click', parent='clicks')
tree.create_node(f'{blue}Click-through rate{reset}',  'ctr', parent='clicks')

tree.show()

print('red - financial metrics, green - decision-making metrics (unit-economics), blue - atomic metrics')

Profit
├── [31mMarketing Budget[0m
│   ├── [32mCustomer Acquisition Cost[0m
│   │   └── [32mConversion Rate[0m
│   └── [34mBuyers[0m
│       ├── [32mConversion Rate[0m
│       └── [32mUsers[0m
│           └── [32mCost Per Acquisition[0m
│               ├── [34mClicks[0m
│               │   ├── [34mClick-through rate[0m
│               │   └── Cost Per Click
│               ├── Campaign
│               │   └── Term
│               │       └── Content
│               └── Source
└── [31mRevenue[0m
    ├── [32mLifetime Value[0m
    │   ├── [31mGross Revenue[0m
    │   ├── [32mAverage Order Value[0m
    │   │   ├── [32mConversion Rate[0m
    │   │   ├── [34mBuyers[0m
    │   │   └── Offer Total Amount
    │   │       ├── Initial Amount Paid
    │   │       └── Payment Type
    │   ├── [32mAverage Purchases per Customer[0m
    │   │   ├── [34mBuyers[0m
    │   │   └── [34mTotal Deals[0m
    │   ├── [32mCustomer Lifetime Value[0m
    │   │   └── Retention

## Task 4. Understand which product metric they will affect and form hypotheses.

#### Based on business goals & tree of metrics, conversion rate, customer acquisition cost & average order value were chosen as important metrics for optimization. 


#### Conversion Rate,%:
##### Case 1:
- ###### Hypothesis 0: adding customer reviews and ratings to the product page will not affect conversion
- ###### Hypothesis: adding customer feedback and rating to the product page will increase trust and lead to conversion growth of 5%

#### Conversion Rate,%:
##### Case 2:
- ###### Hypothesis 0: segmentation not_qualified leads and offer them relevant offers will not affect the conversion
- ###### Hypothesis: segmentation not_qualified leads and offer them relevant offers will increase conversion by 2%

#### Conversion Rate,%:
##### Case 3:
- ###### Hypothesis 0: providing a trial lesson will not affect the conversion
- ###### Hypothesis: providing a trial lesson will evaluate the quality of training and increase conversion by 8%

##### Customer Acquisition Cost: 
##### Case 4:
- ###### Hypothesis 0: introduction of referal program will not affect the customer acquisition cost
- ###### Hypothesis: introduction of referal program will increase the number of organically generated clients, which will reduce the customer acquisition cost by 5%.

##### Customer Acquisition Cost
##### Case 5:
- ###### Hypothesis 0: redistributing 20% of the budget from less efficient advertising sources & campaigns to more efficient ones not affect the cost of attracting customers
- ###### Hypothesis: redistributing 20% of the budget from less efficient advertising sources & campaigns to more efficient ones the customer acquisition cost by 5%

##### Customer Acquisition Cost
##### Case 6:
- ###### Hypothesis 0: targeting more relevant audiences with using more relevant words in term, such as who has already visited our site, will not affect the customer acquisition cost
- ###### Hypothesis: targeting more relevant audiences with using more relevant words in term, will reduce the customer acquisition cost by 5%

##### Average Order Value
##### Case 7:
- ###### Hypothesis 0: offering additional products will not affect the average order check
- ###### Hypothesis: offering additional products (extra support, additional short courses) for an additional small fee (up to 1000 euros) will increase the number of products in the basket and increase the average check by 3%

##### Average Order Value
##### Case 8:
- ###### Hypothesis 0: introduction of discounts on the purchase of a second course will not affect the average check order
- ###### Hypothesis: introduction of discounts on the purchase of a second rate will increase the average check by 2% (although it is more likely to decrease the case as a consequence)
  
##### Average Order Value
##### Case 9:
- ###### Hypothesis 0: combining several courses in one offer with a discount will not affect the average check order
- ###### Hypothesis: combining several courses in one offer with a discount will increase the average check by 7%

## Task 5. Describe the hypothesis testing method with the formulation of the hypothesis condition.

### Hypothesis check

#### Conversion Rate,%:
##### Case 1:
- ###### Hypothesis 0: adding customer reviews and ratings to the product page will not affect conversion
- ###### Hypothesis: adding customer reviews and rating to the product page will increase trust and lead to conversion growth of 5%
 ###### 
- ###### Group A: do not see reviews and ratings to the product page 
- ###### Group B: see reviews and ratings to the product page
- ###### Random distribution of clients into control (А) and test (B) groups. Moreover, both groups have the most similar characteristics
 ######
- ###### During the test run, the conversion rates are collected and then a t-test is performed to confirm or deny the hypothesis.

In [7]:
import numpy as np
import statsmodels.stats.power as smp
from statsmodels.stats.proportion import proportion_effectsize
from scipy import stats

def calculate_sample_size(p1, p2, alpha=0.05, power=0.80):
    z_alpha = 1.96 # standard value
    z_beta = 0.84 # standard value

    n = ((z_alpha * np.sqrt(p1 * (1 - p1)) + z_beta * np.sqrt(p2 * (1 - p2))) ** 2) / ((p2 - p1) ** 2)
    
    return np.ceil(n)

mde = 0.05
base_conversion_A = 0.063
power_test = 0.80 # standard value
alpha_test = 0.05 # standard value

required_n = calculate_sample_size(base_conversion_A, base_conversion_A + mde, alpha=alpha_test, power=power_test)

deals['Created_Time'] = pd.to_datetime(deals['Created_Time'])
daily_traffic = deals.groupby(deals['Created_Time'].dt.date)['Contact_Name'].nunique()
average_daily_traffic = daily_traffic.mean()

days_required = np.ceil(2*required_n/ average_daily_traffic)

print(f'Required sample size for group: {int(required_n)}')
print(f'Test duration: {days_required}')

group_A = np.random.choice([0, 1], size=int(required_n), p=[1 - base_conversion_A, base_conversion_A])

conversion_B = base_conversion_A + mde
group_B = np.random.choice([0, 1], size=int(required_n), p=[1 - conversion_B, conversion_B])

t_stat, p_value = stats.ttest_ind(group_A, group_B, equal_var=False)

print('\nT-test result')
print(f'Group A conversion (real): {np.mean(group_A):.2%}')
print(f'Group B conversion (real): {np.mean(group_B):.2%}')
print(f'p-value: {p_value:.3f}')

if p_value < alpha_test:
    print('\nThe result is statistically significant (p-value < 0.05)')
    print('The zero hypothesis is rejected. The difference in conversion is not accidental')
else:
    print('\nResult is not statistically significant (p-value >= 0.05).')
    print('The zero hypothesis is accepted. The difference in conversion is accidental')

Required sample size for group: 221
Test duration: 13.0

T-test result
Group A conversion (real): 7.69%
Group B conversion (real): 11.31%
p-value: 0.195

Result is not statistically significant (p-value >= 0.05).
The zero hypothesis is accepted. The difference in conversion is accidental


##### Customer Acquisition Cost: 
##### Case 4:
- ###### Hypothesis 0: introduction of referal program will not affect the customer acquisition cost
- ###### Hypothesis: introduction of referal program will increase the number of organically generated clients, which will reduce the customer acquisition cost by 5%.
  ###### 
- ###### Group A: do not participate in referal program
- ###### Group B: receive offer to participate in referal program
- ###### Random distribution of clients into control (А) and test (B) groups. Moreover, both groups have the most similar characteristics
  ######
- ###### During the test run, the conversion rates are collected and then a t-test is performed to confirm or deny the hypothesis.

In [8]:
import numpy as np
import statsmodels.stats.power as smp
from statsmodels.stats.proportion import proportion_effectsize
from scipy import stats

def calculate_sample_size(p1, p2, alpha=0.05, power=0.80):
    z_alpha = 1.96 # standard value
    z_beta = 0.84 # standard value

    n = ((z_alpha * np.sqrt(p1 * (1 - p1)) + z_beta * np.sqrt(p2 * (1 - p2))) ** 2) / ((p2 - p1) ** 2)
    
    return np.ceil(n)

mde = 0.05
base_conversion_A = 0.063
power_test = 0.80 # standard value
alpha_test = 0.05 # standard value

required_n = calculate_sample_size(base_conversion_A, base_conversion_A + mde, alpha=alpha_test, power=power_test)

deals['Created_Time'] = pd.to_datetime(deals['Created_Time'])
daily_traffic = deals.groupby(deals['Created_Time'].dt.date)['Contact_Name'].nunique()
average_daily_traffic = daily_traffic.mean()

days_required = np.ceil(2*required_n/ average_daily_traffic)

print(f'Required sample size for group: {int(required_n)}')
print(f'Test duration: {days_required}')

group_A = np.random.choice([0, 1], size=int(required_n), p=[1 - base_conversion_A, base_conversion_A])

conversion_B = base_conversion_A + mde
group_B = np.random.choice([0, 1], size=int(required_n), p=[1 - conversion_B, conversion_B])

t_stat, p_value = stats.ttest_ind(group_A, group_B, equal_var=False)

print('\nT-test result')
print(f'Group A conversion (real): {np.mean(group_A):.2%}')
print(f'Group B conversion (real): {np.mean(group_B):.2%}')
print(f'p-value: {p_value:.3f}')

if p_value < alpha_test:
    print('\nThe result is statistically significant (p-value < 0.05)')
    print('The zero hypothesis is rejected. The difference in conversion is not accidental')
else:
    print('\nResult is not statistically significant (p-value >= 0.05).')
    print('The zero hypothesis is accepted. The difference in conversion is accidental')

Required sample size for group: 221
Test duration: 13.0

T-test result
Group A conversion (real): 4.52%
Group B conversion (real): 8.14%
p-value: 0.119

Result is not statistically significant (p-value >= 0.05).
The zero hypothesis is accepted. The difference in conversion is accidental


##### Average Order Value
##### Case 7:
- ###### Hypothesis 0: offering additional products will not affect the average order check
- ###### Hypothesis: offering additional products (extra support, additional short courses) for an additional small fee (up to 1000 euros) will increase the number of products in the basket and increase the average check by 3%
 ###### 
- ###### Group A: nothing was offered
- ###### Group B: additional Google AI course for 100 euro was offered 
- ###### Random distribution of clients into control (А) and test (B) groups. Moreover, both groups have the most similar characteristics
 ######
- ###### During the test run, the conversion rates are collected and then a t-test is performed to confirm or deny the hypothesis.

In [9]:
import numpy as np
import statsmodels.stats.power as smp
from statsmodels.stats.proportion import proportion_effectsize
from scipy import stats

def calculate_sample_size(p1, p2, alpha=0.05, power=0.80):
    z_alpha = 1.96 # standard value
    z_beta = 0.84 # standard value

    n = ((z_alpha * np.sqrt(p1 * (1 - p1)) + z_beta * np.sqrt(p2 * (1 - p2))) ** 2) / ((p2 - p1) ** 2)
    
    return np.ceil(n)

mde = 0.03
base_conversion_A = 0.063
power_test = 0.80 # standard value
alpha_test = 0.05 # standard value

required_n = calculate_sample_size(base_conversion_A, base_conversion_A + mde, alpha=alpha_test, power=power_test)

deals['Created_Time'] = pd.to_datetime(deals['Created_Time'])
daily_traffic = deals.groupby(deals['Created_Time'].dt.date)['Contact_Name'].nunique()
average_daily_traffic = daily_traffic.mean()

days_required = np.ceil(2*required_n/ average_daily_traffic)

print(f'Required sample size for group: {int(required_n)}')
print(f'Test duration: {days_required}')

group_A = np.random.choice([0, 1], size=int(required_n), p=[1 - base_conversion_A, base_conversion_A])

conversion_B = base_conversion_A + mde
group_B = np.random.choice([0, 1], size=int(required_n), p=[1 - conversion_B, conversion_B])

t_stat, p_value = stats.ttest_ind(group_A, group_B, equal_var=False)

print('\nT-test result')
print(f'Group A conversion (real): {np.mean(group_A):.2%}')
print(f'Group B conversion (real): {np.mean(group_B):.2%}')
print(f'p-value: {p_value:.3f}')

if p_value < alpha_test:
    print('\nThe result is statistically significant (p-value < 0.05)')
    print('The zero hypothesis is rejected. The difference in conversion is not accidental')
else:
    print('\nResult is not statistically significant (p-value >= 0.05).')
    print('The zero hypothesis is accepted. The difference in conversion is accidental')

Required sample size for group: 577
Test duration: 34.0

T-test result
Group A conversion (real): 6.41%
Group B conversion (real): 8.49%
p-value: 0.179

Result is not statistically significant (p-value >= 0.05).
The zero hypothesis is accepted. The difference in conversion is accidental
