
# PARLA

## Problem
Implement a function that calculates CUPED-metric based on two dataframes:
- a dataframe, containing metric values during the experiment
- a dataframe, containing covariate values

## Action
- extracted covariate values and metric values from input dataframes
- calculated covariance and variance
- calculated theta
- adjusted and rounded metric values

## Result
The implemented function successfully passed all tests.

## Learning
- I revised relevant Python, Numpy, and Pandas functionality
- I learned how to perform CUPED adjustment of experimental data

## Application
- I can apply relevant Python, Numpy, and Pandas functionality for similar data-related problems
- I can apply CUPED adjustment for real-world AB-testing data


In [6]:

import numpy as np
import pandas as pd


In [7]:

def calculate_cuped_metric(
        df_metric: pd.DataFrame,
        df_cov: pd.DataFrame
) -> pd.DataFrame:
    """
    Calculates CUPED-adjusted metric values.

    :param df_metric: A DataFrame with metric values during the experiment,
        containing columns ['user_id', 'metric'].
    :param df_cov: A DataFrame with covariate values,
        containing columns ['user_id', 'cov'].

    :return: A DataFrame with CUPED-adjusted metric values,
        containing columns ['user_id', 'metric'].
    """

    # extract covariate values and metric values from input dataframes
    x = df_cov['cov']
    y = df_metric['metric']
    df = pd.merge(df_metric, df_cov, on='user_id')

    # calculate covariance and variance
    covariance = np.cov(df['cov'], df['metric'])[0, 1]
    variance = np.var(df['cov'])

    # calculate theta
    theta = covariance / variance

    # adjust and round metric values
    df['metric'] = df['metric'] - theta * (df['cov'] - np.mean(df['cov']))
    df['metric'] = np.round(df['metric'], 2)
    del df['cov']
    return df


In [8]:

# testing function calculate_cuped_metric()
df_metric = pd.DataFrame({'user_id': [1, 2, 3], 'metric': [2000, 2500, 3000]})
df_cov = pd.DataFrame({'user_id': [1, 2, 3], 'cov': [1100, 1500, 0]})
df_answer = pd.DataFrame({'user_id': [1, 2, 3], 'metric': [2159.53, 2933.01, 2407.46]})
df_result = calculate_cuped_metric(df_metric, df_cov)

if df_answer.equals(df_result):
    print('test case 01: passed')
else:
    print('test case 01: failed')


test case 01: passed
