In [309]:
import numpy as np
import pandas as pd
import wmfdata as wmf
from wmfdata.utils import pd_display_all, df_to_remarkup

import secrets

In [310]:
wmf.charting.set_mpl_style()

In [311]:
TODAY = pd.datetime.now()

# Events and errors by client type

In [313]:
daily_events_r = wmf.spark.run("""
SELECT
  CONCAT_WS("-", year, LPAD(month, 2, "0"), LPAD(day, 2, "0")) AS date,
  event.client_type,
  COUNT(*)
FROM event.inukapageview
WHERE
  (month = 5 AND day )
GROUP BY
  event.client_type,
  year,
  month,
  day
""")

In [320]:
daily_events = (
  daily_events_r
  .assign(date=lambda df: pd.to_datetime(df["date"]))
  .pivot(index="date", columns="client_type")
  .fillna(0)
  .applymap(int)
)

daily_events.tail(8)

Unnamed: 0_level_0,count(1),count(1),count(1),count(1)
client_type,android-web,ios-web,kaios-app,kaios-web
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2020-03-21,187,1923306,655,10025
2020-03-22,373,2137102,438,12566
2020-03-23,170,1799231,1166,11917
2020-03-24,132,1791527,1294,12057
2020-03-25,329,1930647,185,12077
2020-03-26,704,2022451,2052,18527
2020-03-27,392,2191033,774,164892
2020-03-28,342,1760149,0,148701


In [321]:
_320.droplevel(0, axis=1).reset_index().pipe(df_to_remarkup)

| date | android-web | ios-web | kaios-app | kaios-web
| ----- | ----- | ----- | ----- | ----- 
| 2020-03-21 | 187 | 1923306 | 655 | 10025
| 2020-03-22 | 373 | 2137102 | 438 | 12566
| 2020-03-23 | 170 | 1799231 | 1166 | 11917
| 2020-03-24 | 132 | 1791527 | 1294 | 12057
| 2020-03-25 | 329 | 1930647 | 185 | 12077
| 2020-03-26 | 704 | 2022451 | 2052 | 18527
| 2020-03-27 | 392 | 2191033 | 774 | 164892
| 2020-03-28 | 342 | 1760149 | 0 | 148701



In [210]:
daily_errors_r = wmf.spark.run("""
SELECT
    CONCAT_WS("-", year, LPAD(month, 2, "0"), LPAD(day, 2, "0")) AS date,
    COUNT(1) as errors
FROM event.eventerror
WHERE
  event.schema = "InukaPageView" AND
  year = 2020 AND
  month >= 2
GROUP BY
  year,
  month,
  day
""")

In [212]:
daily_errors = (
  daily_errors_r
  .assign(date=lambda df: pd.to_datetime(df["date"]))
  .sort_values("date")
  .set_index("date")
)

daily_errors.tail()

Unnamed: 0_level_0,errors
date,Unnamed: 1_level_1
2020-03-13,642
2020-03-16,959
2020-03-17,140
2020-03-18,1033
2020-03-19,7


# KaiOS web events

In [305]:
kaios_sample_r = wmf.spark.run("""
SELECT
  wiki,
  dt,
  webhost,
  geocoded_data,
  ev.*,
  ua.*
FROM event.inukapageview ipv
LATERAL VIEW INLINE(ARRAY(event)) ev
LATERAL VIEW INLINE(ARRAY(useragent)) ua
WHERE
  event.client_type = "kaios-web" AND
  year = 2020 AND 
  month = 3 AND
  day = 26
""")

In [308]:
kaios_sample_r["os_family"].value_counts()

Firefox OS    1302
Name: os_family, dtype: int64

In [114]:
android_sample_r = wmf.spark.run("""
SELECT
  wiki,
  dt,
  webhost,
  geocoded_data,
  ev.*,
  ua.*
FROM event.inukapageview ipv
LATERAL VIEW INLINE(ARRAY(event)) ev
LATERAL VIEW INLINE(ARRAY(useragent)) ua
WHERE
  event.client_type = "android-web" AND
  year = 2020 AND 
  month = 3 AND
  day >= 14
""")

In [None]:
android_sample_r

In [118]:
android_sample_r.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 851 entries, 0 to 850
Data columns (total 28 columns):
wiki                    851 non-null object
dt                      851 non-null object
webhost                 851 non-null object
geocoded_data           851 non-null object
user_id                 851 non-null object
session_id              851 non-null object
pageview_token          851 non-null object
client_type             851 non-null object
referring_domain        0 non-null object
load_dt                 851 non-null object
page_open_time          851 non-null int64
page_visible_time       851 non-null int64
section_count           851 non-null int64
opened_section_count    851 non-null int64
page_namespace          851 non-null int64
is_main_page            851 non-null bool
is_search_page          851 non-null bool
app_version             0 non-null object
browser_family          851 non-null object
browser_major           851 non-null object
browser_minor           851 

In [119]:
android_sample_r["browser_family"].value_counts()

UC Browser               627
Chrome Mobile WebView     79
Firefox Mobile            66
MiuiBrowser               61
Chrome                    18
Name: browser_family, dtype: int64

In [117]:
android_sample_r["device_family"].value_counts()

vivo 1718                  323
XT1078                     148
Generic Smartphone         144
XiaoMi Redmi 4              99
XiaoMi Redmi Note 7         29
XiaoMi Mi A1                17
Huawei JSN-L42              12
wv                           8
VCE-AL00                     8
Asus X01BDA                  6
vivo 1951                    6
Other                        6
XiaoMi Redmi Note 5          6
ALP-AL00                     5
XiaoMi Redmi Note 8 Pro      5
A6020a40                     4
XiaoMi Redmi Note 4          4
XiaoMi Redmi Y2              3
XiaoMi Redmi 4A              3
YU5014                       3
XiaoMi Redmi Note 7 Pro      2
vivo 1901                    2
XiaoMi Redmi K20 Pro         2
Lenovo K10a40                2
XiaoMi Redmi Note 7S         2
XiaoMi MI 9                  2
Name: device_family, dtype: int64

In [141]:
kaios_sample_r = wmf.spark.run("""
SELECT
  wiki,
  dt,
  webhost,
  geocoded_data,
  ev.*,
  ua.*
FROM event.inukapageview ipv
LATERAL VIEW INLINE(ARRAY(event)) ev
LATERAL VIEW INLINE(ARRAY(useragent)) ua
WHERE
  event.client_type = "kaios-web" AND
  year = 2020 AND 
  month = 3 AND
  day >= 14
""")

In [None]:
kaios_sample_r["geocoded_data"].apply(lambda x: x["country"]).value_counts()

In [None]:
android_sample_r["geocoded_data"].apply(lambda x: x["country"]).value_counts()

# App pageviews

# KaiOS app events