In [1]:
import wmfdata as wmf

In [30]:
QUERY = """
SELECT
  FIRST(country) AS country,
  LAST(app_version) as app_version,
  AVG(pageviews) AS avg_daily_pageviews,
  COUNT(*) AS days_active
FROM (
  SELECT
    event.user_id AS user_id,
    FIRST(geocoded_data['country']) AS country,
    LAST(event.app_version) AS app_version,
    COUNT(DISTINCT event.pageview_token) AS pageviews
  FROM event.inukapageview
  WHERE
    year = 2021
    AND (
      month = 1 AND day >= 19
      OR month = 2
      OR month = 3 AND day < 2
    )
    AND event.client_type = "kaios-app"
    AND NOT event.is_main_page
    AND geocoded_data['country'] NOT IN ("India", "United States")
  GROUP BY
    event.user_id,
    month, day
) user_days
GROUP BY user_id
"""

avg_daily_pageviews_per_user = wmf.spark.run(QUERY, session_type="yarn-large")

In [31]:
avg_daily_pageviews_per_user.groupby('app_version').size()

app_version
0.0.0          1309
1.0.0           643
1.0.0.1231       20
1.1.0            63
1.2.0         17853
dtype: int64

In [33]:
avg_daily_pageviews_per_user.groupby("country").size().sort_values(ascending=False).head(20)

country
Uganda          3448
Pakistan        3396
Nigeria         2863
Tanzania        2358
Puerto Rico      802
Portugal         694
Cameroon         570
Canada           406
Egypt            347
Mexico           321
Ivory Coast      293
DR Congo         250
Russia           239
Germany          230
South Africa     218
Madagascar       213
Rwanda           188
Zambia           185
Mali             151
Benin            146
dtype: int64

In [35]:
target_users = (
  avg_daily_pageviews_per_user
  .query(
    "country in ('Uganda', 'Pakistan', 'Nigeria', 'Tanzania') &"
    "app_version == '1.2.0'"
  )
)

In [38]:
len(target_users)

12033

In [39]:
target_users['avg_daily_pageviews'].mean()

3.81807134387336

In [28]:
target_users['avg_daily_pageviews'].std()

4.256881261397582