In [2]:
import arviz
import bambi
import matplotlib.pyplot as plt
import pandas as pd
import pymc3 as pm
import scipy.stats as ss

import wmfdata as wmf

In [3]:
wmf.charting.set_mpl_style()

In [4]:
user = pd.read_parquet("data/2022-01-06_experiment_user.parquet")

# Basic tests

In [5]:
control_user = user.query("experiment_group == 'control'")
trending_articles_user = user.query("experiment_group == 'trending-articles'")

In [6]:
control_mean_session_min = control_user['mean_session_min'].mean()
trending_mean_session_min = trending_articles_user['mean_session_min'].mean()
trending_advantage = trending_mean_session_min / control_mean_session_min

print(f"Control mean session length: {round(control_mean_session_min, 2)} min")
print(f"Trending articles mean session length: {round(trending_mean_session_min, 2)} min")
print(f"Trending advantage: {wmf.utils.pct_str(trending_advantage - 1)}")

Control mean session length: 2.16 min
Trending articles mean session length: 2.51 min
Trending advantage: 16.1%


In [7]:
ss.ttest_ind(control_user["mean_session_min"], trending_articles_user["mean_session_min"], equal_var=False)

Ttest_indResult(statistic=-13.876562249255562, pvalue=9.537009333302198e-44)

In [8]:
control_mean_sessions = control_user['sessions'].mean()
trending_mean_sessions = trending_articles_user['sessions'].mean()
trending_advantage = trending_mean_sessions / control_mean_sessions

print(f"Control mean sessions: {round(control_mean_sessions, 3)}")
print(f"Trending articles mean sessions: {round(trending_mean_sessions, 3)}")
print(f"Trending advantage: {wmf.utils.pct_str(trending_advantage - 1)}")

Control mean sessions: 2.115
Trending articles mean sessions: 2.181
Trending advantage: 3.1%


In [9]:
ss.ttest_ind(control_user["sessions"], trending_articles_user["sessions"], equal_var=False)

Ttest_indResult(statistic=-2.9531841871417894, pvalue=0.003145790964003159)

In [10]:
ss.mannwhitneyu(control_user["sessions"], trending_articles_user["sessions"])

MannwhitneyuResult(statistic=1666602340.0, pvalue=1.5950887309004725e-05)

In [11]:
user["total_session_time"] = user["sessions"] * user["mean_session_min"]

In [12]:
user.groupby(["experiment_group", "country"])["sessions"].mean().unstack()

country,NG,PK,TZ,UG
experiment_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
control,2.829459,1.689852,2.169582,2.675613
trending-articles,2.889162,1.678041,2.269816,2.920829


In [13]:
user.groupby(["experiment_group", "country"])["mean_session_min"].mean().unstack()

country,NG,PK,TZ,UG
experiment_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
control,3.625423,1.479766,2.158127,2.929637
trending-articles,4.050836,1.591182,2.643597,3.642461


## Basic tests, known enwiki users only
Users with their language set to something other than English did not see any recommendations, so ideally we would analyze only those users who we knew consistently used English.

However, since we only started storing the `wiki` field partway through, these known English users only amount to about one third of our total users, even though the true proportion of consistent English user is about 90%.

In [14]:
len(user)

116233

In [15]:
user.groupby("wiki").size().sort_values(ascending=False).head(20)

wiki
                        54553
enwiki                  45277
-enwiki                  5417
enwiki-                  5197
swwiki                    464
urwiki                    255
swwiki-enwiki             223
enwiki-swwiki             216
enwiki-dewiki             188
-swwiki                   174
swwiki-                   136
enwiki-urwiki             133
urwiki-enwiki             106
frwiki                    100
enwiki-enwiki-dewiki       95
enwiki-enwiki-swwiki       88
swwiki-swwiki-enwiki       82
enwiki-swwiki-enwiki       76
enwiki-dewiki-enwiki       74
swwiki-enwiki-swwiki       72
dtype: int64

In [16]:
enwiki_user = user.query("wiki == 'enwiki'")
enwiki_control_user = enwiki_user.query("experiment_group == 'control'")
enwiki_trending_articles_user = enwiki_user.query("experiment_group == 'trending-articles'")

In [17]:
control_mean_session_min = enwiki_control_user['mean_session_min'].mean()
trending_mean_session_min = enwiki_trending_articles_user['mean_session_min'].mean()
trending_advantage = trending_mean_session_min / control_mean_session_min

print(f"Control mean session length: {round(control_mean_session_min, 2)} min")
print(f"Trending articles mean session length: {round(trending_mean_session_min, 2)} min")
print(f"Trending advantage: {wmf.utils.pct_str(trending_advantage - 1)}")

Control mean session length: 2.01 min
Trending articles mean session length: 2.38 min
Trending advantage: 18.3%


In [53]:
ss.ttest_ind(
    enwiki_control_user["mean_session_min"],
    enwiki_trending_articles_user["mean_session_min"],
    equal_var=False
)

Ttest_indResult(statistic=-7.553810404259041, pvalue=4.329201831300405e-14)

In [55]:
control_mean_sessions = enwiki_control_user['sessions'].mean()
trending_mean_sessions = enwiki_trending_articles_user['sessions'].mean()
trending_advantage = trending_mean_sessions / control_mean_sessions

print(f"Control mean sessions: {round(control_mean_sessions, 3)}")
print(f"Trending articles mean sessions: {round(trending_mean_sessions, 3)}")
print(f"Trending advantage: {wmf.utils.pct_str(trending_advantage - 1)}")

Control mean sessions: 1.781
Trending articles mean sessions: 1.812
Trending advantage: 1.7%


In [58]:
ss.ttest_ind(
    enwiki_control_user["sessions"],
    enwiki_trending_articles_user["sessions"],
    equal_var=False
)

Ttest_indResult(statistic=-1.4980319059941398, pvalue=0.13413337276886597)

In [59]:
ss.mannwhitneyu(
    enwiki_control_user["sessions"],
    enwiki_trending_articles_user["sessions"]
)

MannwhitneyuResult(statistic=175720176.5, pvalue=0.005344726184725063)