This notebook can be used to backfill new or changed metrics across all history (unlike `03-report.ipynb` which runs all metrics for a single month). To do a new backfill, overwrite this code, run your backfill, and then commit the notebook to Git (so it can be overwritten for the next backfill).

In [115]:
import pandas as pd
from wmfdata import hive

In [116]:
FILENAME = "metrics/metrics.tsv"

# old metric column names (to be removed)
OLD_COLUMNS = [
    "total_edits",
    "uploads",
    "mobile_edits",
    "data_edits",
    "nonbot_nondata_nonupload_edits",
    "revert_rate"
]

MEDIAWIKI_HISTORY_SNAPSHOT = "2019-04"


In [117]:
old_metrics = (
    pd.read_csv(FILENAME, sep="\t", parse_dates = ["month"])
    .set_index("month")
)

old_metrics[OLD_COLUMNS].tail()

Unnamed: 0_level_0,total_edits,uploads,mobile_edits,data_edits,nonbot_nondata_nonupload_edits,revert_rate
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-11-01,44349706.0,592679.0,1228780.0,23908469.0,13563561.0,0.071465
2018-12-01,41966740.0,551296.0,1260692.0,21426310.0,13490420.0,0.065287
2019-01-01,46294961.0,587164.0,1416465.0,23223444.0,14917046.0,0.060343
2019-02-01,44131944.0,534391.0,,22674118.0,13415954.0,0.06014
2019-03-01,49431409.0,547740.0,,28497214.0,14459657.0,0.0526


In [119]:
old_metrics = old_metrics.drop(OLD_COLUMNS, axis=1)

In [121]:
new_metrics = hive.run("""
select
    date_format(event_timestamp, "yyyy-MM-01") as month,
    count(*) as total_edits,
    sum(cast(upload as int)) as uploads,
    sum(cast(mobile_edit as int)) as mobile_edits,
    sum(cast(data_edit as int)) as wikidata_edits,
    sum(cast(nonbot_edit and not data_edit and not upload and not mobile_edit as int)) as other_nonbot_edits,
    sum(cast(reverted as int)) / sum(cast(nonbot_edit as int)) as revert_rate
from (
    select
        event_timestamp,
        (
            array_contains(revision_tags, "mobile edit") or
            array_contains(revision_tags, "mobile app edit") or 
            array_contains(revision_tags, "mobile web edit")
        ) as mobile_edit,
        size(event_user_is_bot_by) = 0 and size(event_user_is_bot_by_historical) = 0 as nonbot_edit,
        (wiki_db = "wikidatawiki" and page_namespace_historical in (0, 120)) as data_edit,
        revision_is_identity_reverted as reverted,
        (revision_parent_id = 0 and page_namespace_historical = 6) as upload
    from wmf.mediawiki_history
    where
        event_timestamp between "2001-01-01" and "2019-04-01" and
        event_entity = "revision" and
        event_type = "create" and
        snapshot = "{mediawiki_history_snapshot}"
) edits
group by date_format(event_timestamp, "yyyy-MM-01")
""".format(mediawiki_history_snapshot=MEDIAWIKI_HISTORY_SNAPSHOT)
)

In [122]:
new_metrics = (
    new_metrics
    .sort_values("month")
    .assign(month=lambda df: pd.to_datetime(df["month"]))
    .set_index("month")
)

In [123]:
new_metrics.head()

Unnamed: 0_level_0,total_edits,uploads,mobile_edits,wikidata_edits,other_nonbot_edits,revert_rate
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2001-01-01,13783,0,0,0,225,0.01476
2001-02-01,2178,0,0,0,600,0.001362
2001-03-01,4078,0,0,0,1018,0.000818
2001-04-01,2735,0,0,0,759,0.003322
2001-05-01,5955,0,0,0,1698,0.001532


In [124]:
new_metrics.tail()

Unnamed: 0_level_0,total_edits,uploads,mobile_edits,wikidata_edits,other_nonbot_edits,revert_rate
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-11-01,44376013,586129,1228949,23896534,11038500,0.093747
2018-12-01,41988394,546299,1260737,21418466,10991655,0.085478
2019-01-01,46312638,580719,1416777,23213770,12143464,0.081303
2019-02-01,44135189,531456,1254935,22667003,10952758,0.073758
2019-03-01,49434627,542547,1379133,28490299,11748062,0.06309


In [127]:
metrics = old_metrics.merge(new_metrics, left_index=True, right_index=True)

In [129]:
pd_display_all(metrics["2017":])

Unnamed: 0_level_0,active_editors,existing_active_editors,global_south_active_editors,global_south_edits,global_south_nonbot_edits,mobile-heavy_wiki_active_editors,mobile-heavy_wiki_edits,mobile-heavy_wiki_new_editor_retention,mobile-heavy_wiki_nonbot_edits,net_new_Commons_content_pages,net_new_Wikidata_entities,net_new_Wikipedia_articles,net_new_content_pages,new_active_editors,new_editor_retention,second_month_active_editors,total_edits,uploads,mobile_edits,wikidata_edits,other_nonbot_edits,revert_rate
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2017-01-01,85498.0,62293.0,,,,3300.0,894436.0,0.045183,454376.0,793270.0,436983.0,412831.0,1899491.0,18417.0,0.051108,3737.0,38001265,832992,900732,17069546,12042844,0.105871
2017-02-01,80421.0,58230.0,,,,3347.0,1093680.0,0.040924,418319.0,793644.0,206788.0,387343.0,1627275.0,16680.0,0.053877,4510.0,38057326,843774,813061,17940961,11105361,0.114371
2017-03-01,87486.0,62324.0,,,,3449.0,969064.0,0.047269,431418.0,674367.0,183115.0,308826.0,1386866.0,19594.0,0.071227,4563.0,33612901,710482,929213,12928116,11894294,0.113531
2017-04-01,82855.0,60268.0,,,,3383.0,911166.0,0.042733,415346.0,595207.0,537677.0,382293.0,1724616.0,16913.0,0.060161,4726.0,31602306,624298,851164,8723317,11099327,0.111393
2017-05-01,86186.0,61146.0,,,,3421.0,729458.0,0.046426,390945.0,833390.0,406552.0,424211.0,1977121.0,19636.0,0.060064,4441.0,41867781,862373,854016,12451250,11024825,0.107609
2017-06-01,81664.0,57870.0,,,,3589.0,738181.0,0.04323,419822.0,868011.0,1049593.0,433003.0,2736800.0,18770.0,0.054192,4112.0,38336583,898029,855413,17156953,10972893,0.098326
2017-07-01,80149.0,58758.0,,,,4107.0,944735.0,0.06143,455920.0,668452.0,2655035.0,329845.0,4035895.0,16379.0,0.049984,4055.0,39740681,702732,934109,19022591,11234467,0.091339
2017-08-01,79186.0,59160.0,,,,3458.0,1039919.0,0.052176,436618.0,456573.0,3874032.0,406120.0,5097291.0,15317.0,0.050048,3779.0,44526564,488750,1007777,19872017,11205655,0.099401
2017-09-01,79610.0,58173.0,,,,3234.0,1242240.0,0.049343,409461.0,719268.0,3143746.0,392927.0,4448320.0,16910.0,0.053856,3636.0,41774909,758462,923963,20179635,10554861,0.101785
2017-10-01,81073.0,59620.0,,,,3381.0,1045401.0,0.042514,408665.0,571720.0,941342.0,185950.0,1886914.0,16516.0,0.058635,3998.0,40886220,611227,999360,16790134,11736089,0.103107


In [130]:
metrics.to_csv(FILENAME, sep="\t")