In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from typing import List, Dict
from scipy.stats import beta
class ABTest:
    def __init__(self, num_variants: int, variant_list: List[str], conversion_probs: List[float], num_rows: int = 5000):
        self.num_variants = num_variants
        self.variant_list = variant_list
        self.conversion_probs = conversion_probs
        self.num_rows = num_rows
        self.df = self.generate_ab_data()
        self.a_priors = [1] * num_variants
        self.b_priors = [1] * num_variants
        self.posteriors = {}
        self.losses = {}

    def generate_ab_data(self) -> pd.DataFrame:
        np.random.seed(123)
        user_ids = np.arange(self.num_rows) + 1
        variants = np.random.choice(self.variant_list, size=self.num_rows)
        if len(self.variant_list) != len(self.conversion_probs):
            raise ValueError('variant_list and conversion_probs must have the same length')
        conversions = np.random.choice([0, 1], size=self.num_rows, p=[1-self.conversion_probs[self.variant_list.index(variant)] for variant in variants])
        return pd.DataFrame({'user_ids': user_ids, 'variants': variants, 'conversions': conversions})

    def update_posteriors(self):
        self.df['a_posterior'] = self.a_priors + self.df['conversions']
        self.df['b_posterior'] = self.b_priors + 1 - self.df['conversions']
        self.posteriors = {variant: beta(a, b) for variant, a, b in zip(self.df['variants'].unique(), self.df['a_posterior'], self.df['b_posterior'])}

    def calculate_losses(self):
        self.losses = {variant: 1 - dist.cdf(max([dist.mean() for key, dist in self.posteriors.items() if key != variant])) for variant, dist in self.posteriors.items()}
        return pd.DataFrame(self.losses.items(), columns=['variant', 'loss'])

    def plot_posteriors(self):
        x = np.linspace(0, 1, 200)
        for variant, dist in self.posteriors.items():
            plt.plot(x, dist.pdf(x), label=variant)
        plt.legend()
        return plt

ab_test = ABTest(num_variants=3, variant_list=['A', 'B', 'C'], conversion_probs=[0.1, 0.15, 0.2], num_rows=5000)
ab_test.update_posteriors()
print(ab_test.calculate_losses())
ab_test.plot_posteriors()
    """
    losses = {}
    for variant, dist in posteriors.items():
        max_conversion_rate = max([dist.mean() for key, dist in posteriors.items() if key != variant])
        losses[variant] = 1 - dist.cdf(max_conversion_rate)
    losses_df: pd.DataFrame = pd.DataFrame(losses.items(), columns=['variant', 'loss'])

    return losses_df


def visualize_distributions(posteriors: Dict[str, beta]) -> None:
    """
    Visualize the posterior distributions for each variant.

    Args:
        posteriors: The dictionary containing the posterior distributions for each variant.
    """
    x = np.linspace(0, 1, 200)
    for variant, dist in posteriors.items():
        plt.plot(x, dist.pdf(x), label=variant)
    plt.legend()
    plt.show()


def main():
    num_variants: int = 3
    num_rows: int = 5000
    variant_list = ['A', 'B', 'C']
    conversion_probs = [0.1, 0.15, 0.2]
    df: pd.DataFrame = generate_ab_data(num_rows, num_variants, variant_list, conversion_probs)

    a_priors: List[int] = [1] * num_variants
    b_priors: List[int] = [1] * num_variants

    posteriors = calculate_posteriors(df, a_priors, b_priors)

    losses_df = calculate_expected_losses(posteriors)
    print(losses_df)

    visualize_distributions(posteriors)


if __name__ == "__main__":
    main()

ModuleNotFoundError: No module named 'numpy'