In [1]:
import pandas as pd

import wmfdata as wmf
from wmfdata.utils import (
    insert_code_toggle,
    pd_display_all,
    print_err
)

from trending_articles import LISTS as countries

In [2]:
insert_code_toggle()

In [3]:
def sql_tuple(i):
    """
    Making an SQL 'tuple', for use in an IN clause, is hard. Doing it manually using 
    `", ".join` requires a lot of messing around with quote marks and escaping. Using the
    string representation of a Python tuple *almost* works, but fails when there's just
    one element, because SQL doesn't accept the trailing comma that Python uses.
    What we really want is the string representation of a Python list, but using parentheses
    instead of brackets. This function turns an iterable into just that.
    """
    # Transform other iterables into lists, raising errors for non-iterables
    if type(i) != list:
        i = [x for x in i]
    
    # Don't return empty SQL tuples, since they cause syntax errors 
    if len(i) == 0:
        return None

    list_repr = repr(i)

    return "(" + list_repr[1:-1] + ")"

The experiment was released to the KaiOS store on 8 April, in version 1.6.0 of the app. Based on the data being collected so far, is the experiment and its data collection running correctly?

In [4]:
experiment_uniques = wmf.presto.run("""
WITH refined AS (
  SELECT
    CAST(FROM_ISO8601_TIMESTAMP(meta.dt) AS DATE) AS date,
    event.user_id AS user_id,
    geocoded_data['country'] AS country,
    REDUCE(
      event.tests, 
      NULL, 
      (i, x) -> IF(x.name = '2021-KaiOS-app-homepage-content-suggestions', x."group", i),
      x -> x
    ) AS experiment_group
  FROM event.inukapageview
  WHERE
    geocoded_data['country'] IN ('Nigeria', 'Pakistan', 'Tanzania', 'Uganda')
    AND year = 2021
    AND month = 4
    AND day >= 8
    AND event.app_version = '1.6.0'
    AND event.is_production
), uniques AS (
  SELECT
    ARBITRARY(country) AS country,
    ARBITRARY(experiment_group) AS experiment_group
  FROM refined
  GROUP BY
    user_id
)
SELECT
  country,
  experiment_group,
  COUNT(*) AS devices
FROM uniques
GROUP BY
  country,
  experiment_group
ORDER BY
  country,
  experiment_group
""")

Based on [my power analysis](T271316#6893769), we expected to accumulate about 12,000 devices as experiment participants over a 6 week experiment.

So far, in roughly 28 days, we have accumulated about 15,000, so we seem to be well ahead of schedule. 

Moreover, these numbers meet two other expectations: the users are split quite evenly between the two groups, and all users from our four participating countries are recorded as being in one of the two groups. 

In [5]:
pd.pivot_table(experiment_uniques, index="country", columns="experiment_group", aggfunc="sum", margins=True)

Unnamed: 0_level_0,devices,devices,devices
experiment_group,control,trending-articles,All
country,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Nigeria,1453,1390,2843
Pakistan,3557,3381,6938
Tanzania,1609,1622,3231
Uganda,1053,1052,2105
All,7672,7445,15117


In [6]:
# Have any of these uniques "crossed oved" between countries or experiment groups?
crossover_uniques = wmf.presto.run("""
WITH refined AS (
  SELECT
    CAST(FROM_ISO8601_TIMESTAMP(meta.dt) AS DATE) AS date,
    event.user_id AS user_id,
    geocoded_data['country'] AS country,
    REDUCE(
      event.tests, 
      NULL, 
      (i, x) -> IF(x.name = '2021-KaiOS-app-homepage-content-suggestions', x."group", i),
      x -> x
    ) AS experiment_group
  FROM event.inukapageview
  WHERE
    geocoded_data['country'] IN ('Nigeria', 'Pakistan', 'Tanzania', 'Uganda')
    AND year = 2021
    AND month = 4
    AND day >= 8
    AND event.app_version = '1.6.0'
    AND event.is_production
)
SELECT
    user_id,
    SET_AGG(country) AS distinct_countries,
    SET_AGG(experiment_group) AS distinct_groups
FROM refined
GROUP BY user_id
HAVING (
    COUNT(DISTINCT country) > 1
    OR COUNT(DISTINCT experiment_group) > 1
)
""")

None of these users was recorded in more than one country (which should be extremely rare). A single one was recorded in both experiment groups. This should never occur, but since it's just one, it won't affect the results.

In [8]:
crossover_uniques.assign(
    user_id=lambda df: df["user_id"].str.slice_replace(0, 16, repl="X"*16)
)

Unnamed: 0,user_id,distinct_countries,distinct_groups
0,XXXXXXXXXXXXXXXXa768,[Pakistan],"[trending-articles, control]"


We intentionally do not track the names of the pages read together with the user ID, so we cannot check the groupwise reading patterns to verify that the users in the trending articles group are actually seeing the recommendations.

However, we do track the aggregate number of views to specific pages by various client _types_, so we can check the overall reading patterns of KaiOS app users in these countries to verify that there is some increase in pageviews to top recommended articles.

In [9]:
country_mapping = pd.DataFrame(
    [(x[0], y) for x in countries.items() for y in x[1]], 
    columns=["recommendation_country", "viewing_country"]
)

top_recommended_articles = (
    pd.read_csv("trending_articles.csv", parse_dates=["date"])
    .query("date >= '2021-04-01' & date < '2021-05-04' & rank == 1")
    .reset_index(drop=True)
    [["date", "country", "article"]]
    .rename({
        "date": "trending_date",
        "country": "recommendation_country"
    }, axis="columns")
    .assign(recommended_date=lambda df: df["trending_date"] + pd.DateOffset(days=1))
    .merge(country_mapping, how="left")
)

In [10]:
relevant_pages = top_recommended_articles["article"].pipe(sql_tuple)
relevant_countries = sql_tuple([l for l in countries.values() for l in l])

relevant_views = wmf.spark.run(f"""
SELECT
    DATE(CONCAT_WS('-', year, LPAD(CAST(month AS STRING), 2, '0'), LPAD(CAST(day AS STRING), 2, '0'))) AS `date`,
    country_code AS viewing_country,
    page_title AS article,
    SUM(view_count) AS views,
    SUM(IF(user_agent_map['os_family'] = 'KaiOS' AND access_method = 'mobile app', view_count, 0)) AS KaiOS_views
FROM wmf.pageview_hourly
WHERE
    page_title IN {relevant_pages}
    AND country_code IN {relevant_countries}
    AND agent_type = 'user'
    AND project = 'en.wikipedia'
    AND year = 2021
    AND month >= 4
GROUP BY
    page_title,
    country_code,
    year,
    month,
    day
""")

PySpark executors will use /usr/lib/anaconda-wmf/bin/python3.


In [16]:
trending_date_relevant_views = (
    relevant_views
    .rename({
        "views": "trending_date_views", 
        "KaiOS_views": "KaiOS_trending_date_views",
        "date": "trending_date"
    }, axis="columns")
    .assign(trending_date=lambda df: pd.to_datetime(df["trending_date"]))
)

top_recommended_articles = pd.merge(
    top_recommended_articles,
    trending_date_relevant_views,
    how="left"
)

recommended_date_relevant_views = (
    relevant_views
    .rename({
        "views": "recommended_date_views", 
        "KaiOS_views": "KaiOS_recommended_date_views",
        "date": "recommended_date"
    }, axis="columns")
    .assign(recommended_date=lambda df: pd.to_datetime(df["recommended_date"]))
)

top_recommended_articles = pd.merge(
    top_recommended_articles,
    recommended_date_relevant_views,
    how="left"
)

There were 108 daily top recommended articles across our 4 experiment country (4 daily for 27 days). Of these, **86** of them had no views on the day they were recommended (as did 62 on them on the day they were trending). Wow.

In [17]:
(
    top_recommended_articles
    .query("viewing_country in ('NG', 'PK', 'TZ', 'UG') & recommended_date >= '2021-04-08'")
    [["KaiOS_trending_date_views", "KaiOS_recommended_date_views"]]
    .applymap(lambda x: x == 0)
    .sum()
    .rename("number_of_0_values")
)

KaiOS_trending_date_views       90
KaiOS_recommended_date_views    86
Name: number_of_0_values, dtype: int64

In [25]:
experiment_pageviews = wmf.presto.run("""
WITH refined AS (
    SELECT
        CAST(FROM_ISO8601_TIMESTAMP(meta.dt) AS DATE) AS "date",
        geocoded_data['country_code'] AS country,
        REDUCE(
          event.tests, 
          NULL, 
          (i, x) -> IF(x.name = '2021-KaiOS-app-homepage-content-suggestions', x."group", i),
          x -> x
        ) AS experiment_group,
        event.pageview_token AS pageview_token
    FROM event.inukapageview
    WHERE
        year = 2021
        AND (month = 4 AND day >= 8 OR month >= 4)
        AND geocoded_data['country_code'] IN ('NG', 'PK', 'TZ', 'UG')
        AND CARDINALITY(FILTER(
            event.tests,
            x -> x.name = '2021-KaiOS-app-homepage-content-suggestions'
        )) > 0
        AND event.is_production
        AND NOT event.is_main_page
        AND NOT event.is_search_page
        -- The recommendations will only be shown if the user is using the app in English
        AND wiki = 'enwiki'
)
SELECT
    date,
    country,
    experiment_group,
    COUNT(DISTINCT pageview_token) AS pageviews
FROM refined
GROUP BY
    date,
    country,
    experiment_group
ORDER BY
    date,
    country,
    experiment_group
""")

daily_experiment_uniques = wmf.presto.run("""
WITH refined AS (
      SELECT
        CAST(FROM_ISO8601_TIMESTAMP(meta.dt) AS DATE) AS "date",
        geocoded_data['country_code'] AS country,
        REDUCE(
          event.tests, 
          NULL, 
          (i, x) -> IF(x.name = '2021-KaiOS-app-homepage-content-suggestions', x."group", i),
          x -> x
        ) AS experiment_group,
        event.user_id AS user_id
    FROM event.inukapageview
    WHERE
        year = 2021
        AND (month = 4 AND day >= 8 OR month >= 4)
        AND geocoded_data['country_code'] IN ('NG', 'PK', 'TZ', 'UG')
        AND CARDINALITY(FILTER(
            event.tests,
            x -> x.name = '2021-KaiOS-app-homepage-content-suggestions'
        )) > 0
        AND event.is_production
        AND NOT event.is_main_page
        AND NOT event.is_search_page
        AND wiki = 'enwiki'
), uniques AS (
    SELECT
        date,
        ARBITRARY(country) AS country,
        ARBITRARY(experiment_group) AS experiment_group
    FROM refined
    GROUP BY
        user_id,
        date
)
SELECT
    date,
    country,
    experiment_group,
    COUNT(*) AS unique_devices
FROM uniques
GROUP BY
    date,
    country,
    experiment_group
ORDER BY
    date,
    country,
    experiment_group
""")

However, we *are* recording pageviews from the users in this experiment; below are the average number of pageviews and unique devices per bucket per country.

In [28]:
print(
    "Average pageviews per bucket per country per day:",
    int(experiment_pageviews.query("date < '2021-05-05'")["pageviews"].mean().round())
)

print(
    "Average unique devices per bucket per country per day:",
    int(daily_experiment_uniques.query("date < '2021-05-05'")["unique_devices"].mean().round())
)

Average pageviews per bucket per country per day: 237
Average unique devices per bucket per country per day: 60


So, this implies the following: on a typical day in one of our countries, there are 60 devices in the trending articles group who together view about 240 pages, but not a _single_ one of them reads the first article on the list. Of course, that's hard to believe.

There are three main possiblities:
1. an app bug preventing the recommendations from showing
2. a data bug preventing the pageviews to the recommended articles from being recorded
3. the app users are not using the recommendations _at all_, possibly because of their somewhat unobtrusive positioning.

To test possibility 2, an engineer in Canada made sure to read all the recommendations (the Ugandan recommendations, served to Canadian devices for testing purposes) on his test device each day from 30 April to 3 May. We can see the effect of this in the pageview data quite clearly.

In [33]:
(
    top_recommended_articles
    .query("viewing_country == 'CA' & recommended_date >= '2021-04-25' & recommended_date < '2021-05-06'")
    [["recommended_date", "KaiOS_recommended_date_views"]]
)

Unnamed: 0,recommended_date,KaiOS_recommended_date_views
243,2021-04-25,0.0
254,2021-04-26,0.0
265,2021-04-27,0.0
276,2021-04-28,0.0
287,2021-04-29,0.0
298,2021-04-30,1.0
309,2021-05-01,1.0
320,2021-05-02,1.0
331,2021-05-03,1.0
342,2021-05-04,0.0
