In [196]:
import pandas as pd
import numpy as np
from dateutil.relativedelta import relativedelta
import datetime as dt

In [2]:
import wmfdata as wmf

You can find the source for `wmfdata` at https://github.com/neilpquinn/wmfdata


In [3]:
FILE = "metrics/metrics.tsv"

# Data loading and transformation

In [66]:
metrics = pd.read_csv(FILE, sep="\t", parse_dates=["month"]).set_index("month")

# Calculate existing active editors and insert in a logical position
ae_idx = metrics.columns.get_loc("active_editors")
eae_vals = metrics["active_editors"] - metrics["new_active_editors"] - metrics["second_month_active_editors"]
metrics.insert(ae_idx + 1, "existing_active_editors", eae_vals)

# Calculate total mobile edits and insert
mwe_idx = metrics.columns.get_loc("mobile_web_edits")
me_vals = metrics["mobile_web_edits"] + metrics["mobile_app_edits"]
metrics.insert(mwe_idx, "mobile_edits", me_vals)

# Drop columns that aren't key metrics
metrics = metrics.drop(["mobile_web_edits", "mobile_app_edits"], axis=1)

metrics.tail()

Unnamed: 0_level_0,active_editors,existing_active_editors,new_active_editors,second_month_active_editors,total_edits,mobile_edits,nonbot_edits,new_editor_retention,uploads,data_edits,total_content,wikipedia_articles,files,wikidata_entities
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2017-12-01,79558,61019.0,14387.0,4152.0,37358877.0,1021714.0,19926673.0,0.052366,522009,13933627,178512868.0,46968190.0,47518748.0,43888840.0
2018-01-01,84422,64391.0,16678.0,3353.0,39437710.0,1153243.0,20374752.0,0.07736,675791,9749202,180301255.0,47369882.0,48199383.0,44334692.0
2018-02-01,78963,59777.0,15052.0,4134.0,39574449.0,1011393.0,19934503.0,0.066922,725015,15704642,184123520.0,47533734.0,48939378.0,47026490.0
2018-03-01,86190,64218.0,17625.0,4347.0,43003671.0,1121184.0,25176956.0,,809162,17996980,,,,
2018-04-01,83705,62907.0,16059.0,4739.0,34530321.0,1107284.0,20762284.0,,634360,12851269,,,,


In [212]:
def fmt_num(n):
    if type(n) in (np.float64, np.int64):
        if n < 5:
            return pct(n)
        else:
            return "{:,.0f}".format(float("{:.3g}".format(n)))
    else:
        return n

def pct(n):
    return "{:.1%}".format(n)

def calc_rpt(ser):
    nn_ser = ser[~ser.isnull()]
    cur_mo = nn_ser.index[-1].date()
    cur = nn_ser[-1]
    mo_prev = nn_ser[-2]
    yr_prev = nn_ser[-13]
    mom_change = (cur / mo_prev) - 1
    yoy_change = (cur / yr_prev) - 1
    
    res = [cur_mo, cur, mom_change, yoy_change]
    return pd.Series(
        [fmt_num(n) for n in res],
        index=["latest_month", "value", "mom_change", "yoy_change"]
    )

In [213]:
metrics.apply(calc_rpt).transpose()

Unnamed: 0,latest_month,value,mom_change,yoy_change
active_editors,2018-04-01,83700,-2.9%,1.0%
existing_active_editors,2018-04-01,62900,-2.0%,2.8%
new_active_editors,2018-04-01,16100,-8.9%,-5.0%
second_month_active_editors,2018-04-01,4740,9.0%,0.3%
total_edits,2018-04-01,34500000,-19.7%,9.5%
mobile_edits,2018-04-01,1110000,-1.2%,30.1%
nonbot_edits,2018-04-01,20800000,-17.5%,19.6%
new_editor_retention,2018-02-01,6.7%,-13.5%,11.2%
uploads,2018-04-01,634000,-21.6%,1.7%
data_edits,2018-04-01,12900000,-28.6%,47.1%
