In [1]:
from dateutil.relativedelta import relativedelta
from numbers import Number
import pandas as pd

import wmfdata as wmf
from wmfdata.utils import pct_str, sig_figs

In [3]:
FILE = "metrics/metrics.tsv"
metrics = pd.read_csv(FILE, sep="\t", parse_dates=["month"]).set_index("month")

FILE_D =  "metrics/diversity_metrics.tsv"
diversity_metrics = pd.read_csv(FILE_D, sep="\t", parse_dates=["month"]).set_index("month")

graph_end = metrics.index[-1]
graph_start = graph_end - relativedelta(months = 47)
prior_month = graph_end- relativedelta(months = 1)

# Data transformation

In [4]:
# Transform content flows into content stocks
def stock_to_flow(arr):
    return arr[-1] - arr[0]

flow_cols = [
    "net_new_content_pages", 
    "net_new_Wikipedia_articles", 
    "net_new_Commons_content_pages", 
    "net_new_Wikidata_entities"
]
stock_cols = [
    "content_pages", 
    "Wikipedia_articles", 
    "Commons_content_pages",
    "Wikidata_entities"
]

name_map = {flow_cols[i]: stock_cols[i] for i in range(len(stock_cols))}

stock_metrics = metrics[flow_cols].cumsum().rename(name_map, axis=1)

metrics = pd.concat([metrics, stock_metrics], axis=1)

In [5]:
metrics_all = pd.concat([metrics, diversity_metrics], axis=1)
metrics_all  = metrics_all.loc[:,~metrics_all.columns.duplicated()]

# Report

In [6]:
report_order = [
     #Editors
    'active_editors',
        'new_active_editors',
        'returning_active_editors',
    'new_editor_retention',
    # Content
    'content_pages', 
        'Wikipedia_articles',
        'Commons_content_pages', 
        'Wikidata_entities',
    'net_new_content_pages',
        'net_new_Wikipedia_articles',
        'net_new_Commons_content_pages',
        'net_new_Wikidata_entities',
    'revert_rate',
    'total_edits',
        'mobile_edits',
        'wikidata_edits',
        'uploads',
        'other_nonbot_edits',
        'anonymous_edits',
        'non_anonymous_edits',
    #Diversity Content
    'global_north_net_new_content',
    'global_south_net_new_content',
    'global_north_edits',
    'global_south_edits',
    #Diversity Editors
    'global_north_active_editors',
    'global_south_active_editors',
    #'global_north_new_editor_retention',
    #'global_south_new_editor_retention'

]

In [7]:
def fmt_num(x):
    if isinstance(x, Number) and not pd.isnull(x):
        x = sig_figs(x, 3)
        
        if x < 5:
            return pct_str(x)
        else:
            return "{:,.0f}".format(x)
    else:
        return x

def calc_rpt(ser):   
    cur = ser[-1]
    
    try:
        yr_prev = ser[-13]
        yoy_value = yr_prev 
        yoy_change = (cur / yr_prev) - 1
    except IndexError:
        yoy_change = None
        
    try:
        yoyoyoy_value = ser[-37]
        cagr_change = (cur  / yoyoyoy_value)**(1/4) - 1
        yoyoyoy = (cur / yoyoyoy_value) - 1
    except IndexError:
        cagr = None
        
    
    res = [cur, yoy_change, yoy_value, yoyoyoy, yoyoyoy_value, cagr_change]
    return pd.Series(
        [fmt_num(n) for n in res],
        index=["value", "yoy_change", "yoy_value", "yoyoyoy", "yoyoyoy_value", "3_yr_cagr"]
    )



#TODO Fix lines below, ensure the logic mirrors the logic updated in calc_rpt
def calc_prev_month_rpt(ser):   
    pm_cur = ser[-2]
    
    try:
        pm_yr_prev = ser[-14]
        pm_yoy_change = (pm_cur / pm_yr_prev) - 1
    except IndexError:
        pm_yoy_change = None
        
    try:
        pm_cagr = (ser[-2] / ser[-49])**(1/4) - 1
    except IndexError:
        pm_cagr = None
        
    
    pm_res = [pm_cur, pm_yoy_change, pm_cagr]
    return pd.Series(
        [fmt_num(n) for n in pm_res],
        index=["previous_M_value", "PM_yoy_change", "PM4_yr_cagr"]
    )

In [8]:
def add_month_header(df):
    header = graph_end.strftime("%Y-%m") + " editing and content metrics"
    new_index = pd.MultiIndex.from_product([[header], df.columns])
    
    df.columns = new_index
    
    return df

(
    metrics_all
    .apply(calc_rpt)
    .transpose()
    .reindex(report_order)
    .pipe(add_month_header)
    .style
    .set_table_styles([{
        'selector': 'th.col_heading.level0',
        'props': 'font-size: 1.5em; text-align: center; font-weight: bold;'
    }])
)

Unnamed: 0_level_0,2023-07 editing and content metrics,2023-07 editing and content metrics,2023-07 editing and content metrics,2023-07 editing and content metrics,2023-07 editing and content metrics,2023-07 editing and content metrics
Unnamed: 0_level_1,value,yoy_change,yoy_value,yoyoyoy,yoyoyoy_value,3_yr_cagr
active_editors,87900,-0.4%,88200,0.2%,87700,0.1%
new_active_editors,15700,-1.4%,15900,-16.0%,18700,-4.3%
returning_active_editors,72200,-0.1%,72300,4.6%,69000,1.1%
new_editor_retention,5.8%,-6.0%,6.2%,5.6%,5.5%,1.4%
content_pages,331000000,8.2%,306000000,28.6%,258000000,6.5%
Wikipedia_articles,63800000,4.2%,61300000,16.4%,54900000,3.9%
Commons_content_pages,97000000,12.2%,86400000,52.2%,63700000,11.1%
Wikidata_entities,108000000,7.1%,101000000,20.9%,89400000,4.9%
net_new_content_pages,2730000,31.1%,2080000,19.6%,2280000,4.6%
net_new_Wikipedia_articles,157000,-25.1%,210000,-59.4%,387000,-20.2%


In [9]:
print("Previous month's report")
(
    metrics_all
    .apply(calc_prev_month_rpt)
    .transpose()
    .reindex(report_order)
    .fillna("—")
)

Previous month's report


Unnamed: 0,previous_M_value,PM_yoy_change,PM4_yr_cagr
active_editors,88000,-0.4%,1.6%
new_active_editors,16200,-2.8%,1.2%
returning_active_editors,71800,0.2%,1.7%
new_editor_retention,6.2%,-9.3%,5.7%
content_pages,329000000,8.1%,11.6%
Wikipedia_articles,63700000,4.2%,5.7%
Commons_content_pages,95700000,12.2%,14.6%
Wikidata_entities,107000000,6.7%,16.0%
net_new_content_pages,2880000,33.8%,6.8%
net_new_Wikipedia_articles,160000,-22.1%,-8.7%
