<a href="https://colab.research.google.com/github/yashav181004/OCR/blob/main/A_B_testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from scipy import stats
from statsmodels.stats.power import TTestIndPower

def perform_ab_test(df):
    group_a = df[df['group'] == 'A']
    group_b = df[df['group'] == 'B']
    conv_a = group_a['conversion']
    conv_b = group_b['conversion']
    t_stat, p_val = stats.ttest_ind(conv_a, conv_b, equal_var=False)
    return f"T-statistic: {t_stat:.4f}, P-value: {p_val:.4f}"

def calculate_power(df, alpha=0.05):
    group_a = df[df['group'] == 'A']['conversion']
    group_b = df[df['group'] == 'B']['conversion']
    effect_size = (group_b.mean() - group_a.mean()) / np.sqrt((group_a.var() + group_b.var()) / 2)
    analysis = TTestIndPower()
    power = analysis.power(effect_size=effect_size, nobs1=len(group_a), alpha=alpha, ratio=1.0, alternative='two-sided')
    return f"Estimated Power: {power:.4f}"

def calculate_roi(df):
    group_a = df[df['group'] == 'A']
    group_b = df[df['group'] == 'B']
    revenue_a = group_a['revenue'].mean()
    revenue_b = group_b['revenue'].mean()
    uplift = (revenue_b - revenue_a) / revenue_a * 100 if revenue_a != 0 else float('inf')
    return f"Avg Revenue A: ${revenue_a:.2f}, Avg Revenue B: ${revenue_b:.2f}, Uplift: {uplift:.2f}%"

def calculate_cumulative_metrics(df):
    cumulative_a = df[df['group'] == 'A'].cumsum()
    cumulative_b = df[df['group'] == 'B'].cumsum()
    return {
        'cumulative_a': cumulative_a,
        'cumulative_b': cumulative_b
    }

def calculate_conversion_rate(df):
    group_a = df[df['group'] == 'A']
    group_b = df[df['group'] == 'B']
    conversion_rate_a = group_a['conversion'].mean() * 100
    conversion_rate_b = group_b['conversion'].mean() * 100
    return f"Conversion Rate A: {conversion_rate_a:.2f}%, Conversion Rate B: {conversion_rate_b:.2f}%"

def calculate_sample_size(effect_size, alpha=0.05, power=0.8):
    analysis = TTestIndPower()
    sample_size = analysis.solve_power(effect_size=effect_size, alpha=alpha, power=power, ratio=1.0, alternative='two-sided')
    return f"Required Sample Size per Group: {int(np.ceil(sample_size))}"

def calculate_confidence_intervals(df, confidence=0.95):
    group_a = df[df['group'] == 'A']['conversion']
    group_b = df[df['group'] == 'B']['conversion']
    ci_a = stats.t.interval(confidence, len(group_a)-1, loc=group_a.mean(), scale=stats.sem(group_a))
    ci_b = stats.t.interval(confidence, len(group_b)-1, loc=group_b.mean(), scale=stats.sem(group_b))
    return {
        'confidence_interval_a': ci_a,
        'confidence_interval_b': ci_b
    }
def calculate_effect_size(df):
    group_a = df[df['group'] == 'A']['conversion']
    group_b = df[df['group'] == 'B']['conversion']
    effect_size = (group_b.mean() - group_a.mean()) / np.sqrt((group_a.var() + group_b.var()) / 2)
    return f"Effect Size: {effect_size:.4f}"
def calculate_median_conversion(df):
    group_a = df[df['group'] == 'A']['conversion']
    group_b = df[df['group'] == 'B']['conversion']
    median_a = np.median(group_a)
    median_b = np.median(group_b)
    return f"Median Conversion A: {median_a:.4f}, Median Conversion B: {median_b:.4f}"

def calculate_variance(df):
    group_a = df[df['group'] == 'A']['conversion']
    group_b = df[df['group'] == 'B']['conversion']
    variance_a = np.var(group_a, ddof=1)
    variance_b = np.var(group_b, ddof=1)
    return f"Variance A: {variance_a:.4f}, Variance B: {variance_b:.4f}"

def calculate_standard_deviation(df):
    group_a = df[df['group'] == 'A']['conversion']
    group_b = df[df['group'] == 'B']['conversion']
    std_dev_a = np.std(group_a, ddof=1)
    std_dev_b = np.std(group_b, ddof=1)
    return f"Standard Deviation A: {std_dev_a:.4f}, Standard Deviation B: {std_dev_b:.4f}"

In [None]:
import plotly.express as px

def create_conversion_chart(df):
    conv_rates = df.groupby('group')['conversion'].mean().reset_index()
    conv_rates.columns = ['Group', 'Conversion Rate']
    fig = px.bar(conv_rates, x='Group', y='Conversion Rate', title="Conversion Rate by Group")
    return fig

In [None]:
import base64
import io
import pandas as pd

def load_data(contents):
    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    return pd.read_csv(io.StringIO(decoded.decode('utf-8')))

In [None]:
import pandas as pd
import io

# Raw CSV data as a string
csv_data = """user_id,group,conversion,revenue
1,A,1,30
2,A,0,0
3,A,1,50
4,A,1,20
5,A,0,0
6,A,1,45
7,A,0,0
8,A,0,0
9,A,1,40
10,A,1,35
11,A,0,0
12,A,1,25
13,A,0,0
14,A,0,0
15,A,1,55
16,A,1,33
17,A,0,0
18,A,1,38
19,A,1,42
20,A,0,0
21,A,0,0
22,A,1,60
23,A,1,39
24,A,0,0
25,A,0,0
26,A,1,27
27,A,0,0
28,A,1,49
29,A,1,31
30,A,0,0
31,B,1,70
32,B,1,45
33,B,0,0
34,B,1,60
35,B,0,0
36,B,1,55
37,B,1,63
38,B,0,0
39,B,1,48
40,B,0,0
41,B,0,0
42,B,1,53
43,B,1,50
44,B,0,0
45,B,1,62
46,B,0,0
47,B,0,0
48,B,1,57
49,B,1,66
50,B,0,0
"""

# Read the CSV data into a pandas DataFrame
df = pd.read_csv(io.StringIO(csv_data))

# You can now proceed with using the 'df' DataFrame
# For example, print the head to verify:
# print(df.head())

In [None]:
!pip install dash dash-core-components dash-html-components dash-bootstrap-components plotly pandas

Collecting dash
  Downloading dash-3.2.0-py3-none-any.whl.metadata (10 kB)
Collecting dash-core-components
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting dash-html-components
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-bootstrap-components
  Downloading dash_bootstrap_components-2.0.3-py3-none-any.whl.metadata (18 kB)
Collecting retrying (from dash)
  Downloading retrying-1.4.2-py3-none-any.whl.metadata (5.5 kB)
Downloading dash-3.2.0-py3-none-any.whl (7.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m29.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Downloading dash_bootstrap_components-2.0.3-py3-none-any.whl (203 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m203.7/203.7 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?2

In [None]:
import dash
from dash import dcc, html, Input, Output, State
import pandas as pd
# Assuming data_loader, analysis, and visualization are local modules containing the functions
# If they are in the same notebook, you don't need these imports and can remove them.
# from data_loader import load_data
# from analysis import perform_ab_test, calculate_power, calculate_roi
# from visualization import create_conversion_chart

app = dash.Dash(__name__)
app.title = "A/B Testing Analysis Platform"

app.layout = html.Div([
    html.H1("A/B Testing Dashboard"),
    dcc.Upload(
        id='upload-data',
        children=html.Div(['Drag and Drop or ', html.A('Select CSV File')]),
        style={
            'width': '50%',
            'height': '60px',
            'lineHeight': '60px',
            'borderWidth': '1px',
            'borderStyle': 'dashed',
            'borderRadius': '5px',
            'textAlign': 'center',
            'margin': '10px'
        },
        multiple=False
    ),
    html.Div(id='output-data-upload'),
    html.Div(id='test-result'),
    html.Div(id='power-analysis'),
    html.Div(id='roi-analysis'),
    dcc.Graph(id='conversion-chart')
])

# Define the data loading function within the notebook for simplicity if not using external files
import base64
import io

def load_data(contents):
    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    return pd.read_csv(io.StringIO(decoded.decode('utf-8')))

# Define the analysis functions within the notebook for simplicity if not using external files
import numpy as np
from scipy import stats
from statsmodels.stats.power import TTestIndPower

def perform_ab_test(df):
    group_a = df[df['group'] == 'A']
    group_b = df[df['group'] == 'B']
    conv_a = group_a['conversion']
    conv_b = group_b['conversion']
    t_stat, p_val = stats.ttest_ind(conv_a, conv_b, equal_var=False)
    return f"T-statistic: {t_stat:.4f}, P-value: {p_val:.4f}"

def calculate_power(df, alpha=0.05):
    group_a = df[df['group'] == 'A']['conversion']
    group_b = df[df['group'] == 'B']['conversion']
    effect_size = (group_b.mean() - group_a.mean()) / np.sqrt((group_a.var() + group_b.var()) / 2)
    analysis = TTestIndPower()
    power = analysis.power(effect_size=effect_size, nobs1=len(group_a), alpha=alpha, ratio=1.0, alternative='two-sided')
    return f"Estimated Power: {power:.4f}"

def calculate_roi(df):
    group_a = df[df['group'] == 'A']
    group_b = df[df['group'] == 'B']
    revenue_a = group_a['revenue'].mean()
    revenue_b = group_b['revenue'].mean()
    uplift = (revenue_b - revenue_a) / revenue_a * 100 if revenue_a != 0 else float('inf')
    return f"Avg Revenue A: ${revenue_a:.2f}, Avg Revenue B: ${revenue_b:.2f}, Uplift: {uplift:.2f}%"

def calculate_cumulative_metrics(df):
    # Indent the following lines to be part of the function
    cumulative_a = df[df['group'] == 'A'].cumsum()
    cumulative_b = df[df['group'] == 'B'].cumsum()
    return {
        'cumulative_a': cumulative_a,
        'cumulative_b': cumulative_b
    }

def calculate_conversion_rate(df):
    group_a = df[df['group'] == 'A']
    group_b = df[df['group'] == 'B']
    conversion_rate_a = group_a['conversion'].mean() * 100
    conversion_rate_b = group_b['conversion'].mean() * 100
    return f"Conversion Rate A: {conversion_rate_a:.2f}%, Conversion Rate B: {conversion_rate_b:.2f}%"

def calculate_sample_size(effect_size, alpha=0.05, power=0.8):
    analysis = TTestIndPower()
    sample_size = analysis.solve_power(effect_size=effect_size, alpha=alpha, power=power, ratio=1.0, alternative='two-sided')
    return f"Required Sample Size per Group: {int(np.ceil(sample_size))}"

def calculate_confidence_intervals(df, confidence=0.95):
    group_a = df[df['group'] == 'A']['conversion']
    group_b = df[df['group'] == 'B']['conversion']
    ci_a = stats.t.interval(confidence, len(group_a)-1, loc=group_a.mean(), scale=stats.sem(group_a))
    ci_b = stats.t.interval(confidence, len(group_b)-1, loc=group_b.mean(), scale=stats.sem(group_b))
    return {
        'confidence_interval_a': ci_a,
        'confidence_interval_b': ci_b
    }
def calculate_effect_size(df):
    group_a = df[df['group'] == 'A']['conversion']
    group_b = df[df['group'] == 'B']['conversion']
    effect_size = (group_b.mean() - group_a.mean()) / np.sqrt((group_a.var() + group_b.var()) / 2)
    return f"Effect Size: {effect_size:.4f}"
def calculate_median_conversion(df):
    group_a = df[df['group'] == 'A']['conversion']
    group_b = df[df['group'] == 'B']['conversion']
    median_a = np.median(group_a)
    median_b = np.median(group_b)
    return f"Median Conversion A: {median_a:.4f}, Median Conversion B: {median_b:.4f}"

def calculate_variance(df):
    group_a = df[df['group'] == 'A']['conversion']
    group_b = df[df['group'] == 'B']['conversion']
    variance_a = np.var(group_a, ddof=1)
    variance_b = np.var(group_b, ddof=1)
    return f"Variance A: {variance_a:.4f}, Variance B: {variance_b:.4f}"

def calculate_standard_deviation(df):
    group_a = df[df['group'] == 'A']['conversion']
    group_b = df[df['group'] == 'B']['conversion']
    std_dev_a = np.std(group_a, ddof=1)
    std_dev_b = np.std(group_b, ddof=1)
    return f"Standard Deviation A: {std_dev_a:.4f}, Standard Deviation B: {std_dev_b:.4f}"

# Define the visualization function within the notebook for simplicity if not using external files
import plotly.express as px

def create_conversion_chart(df):
    conv_rates = df.groupby('group')['conversion'].mean().reset_index()
    conv_rates.columns = ['Group', 'Conversion Rate']
    fig = px.bar(conv_rates, x='Group', y='Conversion Rate', title="Conversion Rate by Group")
    return fig


@app.callback(
    Output('output-data-upload', 'children'),
    Output('test-result', 'children'),
    Output('power-analysis', 'children'),
    Output('roi-analysis', 'children'),
    Output('conversion-chart', 'figure'),
    Input('upload-data', 'contents'),
    State('upload-data', 'filename')
)
def update_output(contents, filename):
    if contents is not None:
        df = load_data(contents)
        ab_result = perform_ab_test(df)
        power_result = calculate_power(df)
        roi_result = calculate_roi(df)
        fig = create_conversion_chart(df)
        return (
            html.Div([html.H5(filename), html.H6("Data Uploaded Successfully")]),
            html.Div([html.H4("A/B Test Result"), html.P(ab_result)]),
            html.Div([html.H4("Power Analysis"), html.P(power_result)]),
            html.Div([html.H4("ROI Analysis"), html.P(roi_result)]),
            fig
        )
    return "", "", "", "", {}

if __name__ == '__main__':
    # Changed from app.run_server to app.run
    app.run(debug=True)

<IPython.core.display.Javascript object>