## Visualization

In [None]:
shhh <- function(expr) suppressPackageStartupMessages(suppressWarnings(suppressMessages(expr)))
shhh({
    library(magrittr); library(zeallot); library(glue); library(tidyverse); library(glue); library(lubridate)
    library(scales);library(reshape2); library(RColorBrewer);
})

### Editors

In [None]:
file <- "metrics/metrics.tsv"
contributor <- read.csv(file,sep = '\t')
contributor$month <- as.Date(contributor$month, format = "%Y-%m-%d")

In [None]:
tail(contributor)

In [None]:
editors_fy <- contributor %>% 
     select(month,active_editors,new_active_editors,returning_active_editors) %>%
     filter(month >= '2019-07-01') %>%
     mutate(fiscal_year = ifelse(month >= '2019-07-01' & month < '2020-07-01', 'FY 2019/20', ifelse(month <'2019-07-01','FY 2019/20',ifelse(month <'2021-07-01','FY 2020/21','FY 2021/22'))),
           MonthN =as.factor(format(as.Date(month),"%m")),
         Month = months(as.Date(month), abbreviate=TRUE))

editors_fy$MonthN = factor(editors_fy$MonthN, levels=c("07","08", "09", "10", "11", "12", "01", "02", "03", "04", "05", "06"))

In [None]:
options(repr.plot.width = 14, repr.plot.height = 5)

p1 <- 
    ggplot(editors_fy,aes(x = MonthN,y = active_editors, group=fiscal_year, color = fiscal_year,size=fiscal_year,linetype = fiscal_year))+
    geom_line() +
    geom_point(data=editors_fy[(editors_fy$fiscal_year =="FY 2021/22"),],aes(x=MonthN, y=active_editors, group=fiscal_year, color = fiscal_year),size = 5,color ="#2a4b8d")+
    scale_size_manual(values = c(1.2,1.3,2.2)) +
    scale_color_manual(values = c("FY 2019/20" = "#b1c4eb", "FY 2020/21" = "#3366cc","FY 2021/22" = "#2a4b8d")) +
    scale_linetype_manual(values=c("twodash", "dashed","solid")) +
    scale_x_discrete(breaks = editors_fy$MonthN, labels = editors_fy$Month) +
    scale_y_continuous("Active Editors", labels = polloi::compress,limits = c(75E3, 105E3)) +
    ggthemes::theme_tufte(base_size = 16,base_family = "serif") +
    theme(axis.text.x=element_text( hjust = 0,size = 16,family="Courier"),
           axis.text.y=element_text( hjust = 0,size = 22,family="Courier"),
          axis.title.x=element_blank(),
          panel.grid = element_line("gray80"),
          plot.title = element_text(size = 20,hjust = 0,face= "bold"),
          axis.title=element_text(size=16,hjust = 1),
          legend.title = element_blank(),
          legend.text =element_text( hjust = 0.5,size = 12),
          legend.key.width=unit(1.5,"cm"),
          panel.grid.minor.y = element_blank()) +
    labs(title = "Monthly Active Editors") 

p1

In [None]:
options(repr.plot.width = 14, repr.plot.height = 4)

p2 <- ggplot(editors_fy,
             aes(x = MonthN,y = new_active_editors, group=fiscal_year, color = fiscal_year,size=fiscal_year,linetype = fiscal_year)
            )+
    geom_line() +
    geom_point(data=editors_fy[(editors_fy$fiscal_year =="FY 2021/22"),],aes(x=MonthN, y=new_active_editors, group=fiscal_year, color = fiscal_year),size = 4,color='#b32424')+

    scale_size_manual(values = c(1,1.1,1.3,2)) +
    scale_color_manual(values = c("FY 2019/20" = "#ec8ba7", "FY 2020/21" = "#dd3333","FY 2021/22" = "#b32424")) +
    scale_linetype_manual(values=c("twodash", "dashed","solid")) +
    scale_x_discrete("Month",
                      breaks = editors_fy$MonthN, 
                     labels = editors_fy$Month) +
    scale_y_continuous("New Active Editors",labels = polloi::compress,limits = c(10E3, 28E3)) +
    ggthemes::theme_tufte(base_size = 16,base_family = "serif") +
    theme(axis.title.x=element_blank(),
          axis.text.x=element_text( hjust = 0,size = 16,family="Courier"),
           axis.text.y=element_text( hjust = 0,size = 22,family="Courier"),
          panel.grid = element_line("gray80"),
          plot.title = element_text(size = 14,hjust = 0,face= "bold"),
          axis.title=element_text(size=16,hjust = 1),
          legend.title = element_blank(),
          legend.text =element_text( hjust = 0.5,size = 12),
          legend.key.width=unit(1.5,"cm"),
          panel.grid.minor.y = element_blank()) +
    labs(title = "Monthly New Active Editors") 

p2

In [None]:
options(repr.plot.width = 14, repr.plot.height = 4)

p3 <- ggplot(editors_fy,aes(x = MonthN,y = returning_active_editors, group=fiscal_year, color = fiscal_year,size=fiscal_year,linetype = fiscal_year))+
    geom_line() +
    geom_point(data=editors_fy[(editors_fy$fiscal_year =="FY 2021/22"),],aes(x=MonthN, y=returning_active_editors, group=fiscal_year, color = fiscal_year),size = 4,color='#b32424')+
    scale_size_manual(values = c(1.1,1.1,1.3,2)) +
    scale_color_manual(values = c("FY 2019/20" = "#ec8ba7", "FY 2020/21" = "#dd3333","FY 2021/22" = "#b32424")) +
    scale_linetype_manual(values=c("twodash", "dashed","solid")) +
    scale_x_discrete("Month",breaks = editors_fy$MonthN, labels = editors_fy$Month) +
    scale_y_continuous("Returning Active Editors", labels = polloi::compress,limits = c(60E3,80E3)) +
    ggthemes::theme_tufte(base_size = 16,base_family = "serif") +
     theme(axis.title.x=element_blank(),
           axis.text.x=element_text( hjust = 0,size = 16,family="Courier"),
           axis.text.y=element_text( hjust = 0,size = 22,family="Courier"),
          panel.grid = element_line("gray80"),
          plot.title = element_text(size = 14,hjust = 0,face= "bold"),
          axis.title=element_text(size=16,hjust = 1),
           legend.title = element_blank(),
          legend.text =element_text( hjust = 0.5,size = 12),
          legend.key.width=unit(1.5,"cm"),
          panel.grid.minor.y = element_blank()) +
    labs(title = "Monthly Returning Active Editors")

p3

### Net new Content

In [None]:
content_fy <- contributor %>% 
     select(month,net_new_content_pages,net_new_Commons_content_pages,net_new_Wikidata_entities,net_new_Wikipedia_articles) %>%
     mutate(without_wikidata = net_new_content_pages - net_new_Wikidata_entities ) %>%
     #filter(month >= '2018-07-01') %>%
     filter(month >= '2019-07-01') %>%
     mutate(fiscal_year = ifelse(month >= '2019-07-01' & month < '2020-07-01', 'FY 2019/20', ifelse(month <'2019-07-01','FY 2018/19',ifelse(month <'2021-07-01','FY 2020/21','FY 2021/22'))),
           MonthN =as.factor(format(as.Date(month),"%m")),
         Month = months(as.Date(month), abbreviate=TRUE))

content_fy$MonthN = factor(content_fy$MonthN, levels=c("07","08", "09", "10", "11", "12", "01", "02", "03", "04", "05", "06"))

In [None]:
options(repr.plot.width = 14, repr.plot.height = 5)

p1 <-
    ggplot(content_fy,aes(x = MonthN,y = without_wikidata, group=fiscal_year, color = fiscal_year,size=fiscal_year,linetype = fiscal_year))+
    geom_line() +
    geom_point(data=content_fy[(content_fy$fiscal_year =="FY 2021/22"),],aes(x=MonthN, y=without_wikidata, group=fiscal_year, color = fiscal_year),size = 4,color='#14866d')+
    scale_size_manual(values = c(1.1,1.1,1.3,2)) +

    scale_color_manual(values = c("FY 2019/20" = "#00fcc5", "FY 2020/21" = "#00af89","FY 2021/22" = "#14866d")) +
    scale_linetype_manual(values=c("twodash", "dashed","solid")) +
    scale_x_discrete(breaks = content_fy$MonthN, labels = content_fy$Month) +
    scale_y_continuous("Monthly Net New Content (non-wikidata)", labels = polloi::compress,limits = c(75E4, 300E4)) +

    ggthemes::theme_tufte(base_size = 16,base_family = "serif") +
    theme(axis.text.x=element_text( hjust = 0,size = 18,family="Courier"),
          axis.text.y=element_text( hjust = 0,size = 24,family="Courier"),
          axis.title.x=element_blank(),
          panel.grid = element_line("gray80"),
          plot.title = element_text(size = 20,hjust = 0,face= "bold"),
          axis.title=element_text(size=16,hjust = 1),
          legend.title = element_blank(),
          legend.text =element_text( hjust = 0.5,size = 12),
          legend.key.width=unit(1.5,"cm"),
          panel.grid.minor.y = element_blank()) +
    labs(title = "Monthly Net New Content (non-wikidata)") 

p1

In [None]:
options(repr.plot.width = 14, repr.plot.height = 5)

p1 <- 
    ggplot(content_fy,aes(x = MonthN,y = net_new_content_pages, group=fiscal_year, color = fiscal_year,size=fiscal_year,linetype = fiscal_year))+
    geom_line() +
    geom_point(data=content_fy[(content_fy$fiscal_year =="FY 2021/22"),],aes(x=MonthN, y=net_new_content_pages, group=fiscal_year, color = fiscal_year),size = 4,color='#14866d')+
    scale_size_manual(values = c(1.1,1.1,1.3,2)) +

    scale_size_manual(values = c(1,1.2,1.3,2.2)) +
    scale_color_manual(values = c("FY 2019/20" = "#00fcc5", "FY 2020/21" = "#00af89","FY 2021/22" = "#14866d")) +
    scale_linetype_manual(values=c("twodash", "dashed","solid")) +
    scale_x_discrete(breaks = content_fy$MonthN, labels = content_fy$Month) +
    scale_y_continuous("Monthly Net New Content", labels = polloi::compress) +
    ggthemes::theme_tufte(base_size = 16,base_family = "serif") +
    theme(axis.text.x=element_text( hjust = 0,size = 18,family="Courier"),
          axis.text.y=element_text( hjust = 0,size = 24,family="Courier"),
          axis.title.x=element_blank(),
          panel.grid = element_line("gray80"),
          plot.title = element_text(size = 20,hjust = 0,face= "bold"),
          axis.title=element_text(size=16,hjust = 1),
          legend.title = element_blank(),
          legend.text =element_text( hjust = 0.5,size = 12),
          legend.key.width=unit(1.5,"cm"),
          panel.grid.minor.y = element_blank()) +
    labs(title = "Monthly Net New Content") 

p1

In [None]:
options(repr.plot.width = 14, repr.plot.height = 5)

p1 <- 
    ggplot(content_fy,aes(x = MonthN,y = net_new_Wikipedia_articles, group=fiscal_year, color = fiscal_year,size=fiscal_year,linetype = fiscal_year))+
    geom_line() +
    geom_point(data=content_fy[(content_fy$fiscal_year =="FY 2021/22"),],aes(x=MonthN, y=net_new_Wikipedia_articles, group=fiscal_year, color = fiscal_year),size = 4,color='#14866d')+
    scale_size_manual(values = c(1.1,1.1,1.3,2)) +
    scale_size_manual(values = c(1,1.2,1.3,2.2)) +
    scale_color_manual(values = c("FY 2019/20" = "#00fcc5", "FY 2020/21" = "#00af89","FY 2021/22" = "#14866d")) +
    scale_linetype_manual(values=c("twodash", "dashed","solid")) +
    scale_x_discrete(breaks = content_fy$MonthN, labels = content_fy$Month) +
    scale_y_continuous("Monthly Net Wikipedia Articles", labels = polloi::compress) +
    ggthemes::theme_tufte(base_size = 16,base_family = "serif") +
    theme(axis.text.x=element_text( hjust = 0,size = 18,family="Courier"),
          axis.text.y=element_text( hjust = 0,size = 24,family="Courier"),
          axis.title.x=element_blank(),
          panel.grid = element_line("gray80"),
          plot.title = element_text(size = 20,hjust = 0,face= "bold"),
          axis.title=element_text(size=16,hjust = 1),
          legend.title = element_blank(),
          legend.text =element_text( hjust = 0.5,size = 12),
          legend.key.width=unit(1.5,"cm"),
          panel.grid.minor.y = element_blank()) +
    labs(title = "Monthly Net Wikipedia Articles") 

p1


In [None]:
options(repr.plot.width = 14, repr.plot.height = 5)

p1 <- 

    ggplot(content_fy,aes(x = MonthN,y = net_new_Commons_content_pages, group=fiscal_year, color = fiscal_year,size=fiscal_year,linetype = fiscal_year))+
    geom_line() +
    geom_point(data=content_fy[(content_fy$fiscal_year =="FY 2021/22"),],aes(x=MonthN, y=net_new_Commons_content_pages, group=fiscal_year, color = fiscal_year),size = 4,color='#14866d')+
    scale_size_manual(values = c(1.1,1.1,1.3,2)) +
    scale_size_manual(values = c(1,1.2,1.3,2.2)) +
    scale_color_manual(values = c("FY 2019/20" = "#00fcc5", "FY 2020/21" = "#00af89","FY 2021/22" = "#14866d")) +
    scale_linetype_manual(values=c("twodash", "dashed","solid")) +
    scale_x_discrete(breaks = content_fy$MonthN, labels = content_fy$Month) +
    scale_y_continuous("Monthly Net New Commons content", labels = polloi::compress) +
    ggthemes::theme_tufte(base_size = 16,base_family = "serif") +
    theme(axis.text.x=element_text( hjust = 0,size = 18,family="Courier"),
          axis.text.y=element_text( hjust = 0,size = 24,family="Courier"),
          axis.title.x=element_blank(),
          panel.grid = element_line("gray80"),
          plot.title = element_text(size = 20,hjust = 0,face= "bold"),
          axis.title=element_text(size=16,hjust = 1),
          legend.title = element_blank(),
          legend.text =element_text( hjust = 0.5,size = 12),
          legend.key.width=unit(1.5,"cm"),
          panel.grid.minor.y = element_blank()) +
    labs(title = "Monthly Net New Commons content") 

p1


In [None]:
retention_fy <- contributor %>% 
     select(month,new_editor_retention) %>%
      filter(month >= '2019-07-01') %>%
      mutate(fiscal_year = ifelse(month >= '2019-07-01' & month < '2020-07-01', 'FY 2019/20', ifelse(month <'2019-07-01','FY 2018/17',ifelse(month <'2021-07-01','FY 2020/21','FY 2021/22'))),
           MonthN =as.factor(format(as.Date(month),"%m")),
         Month = months(as.Date(month), abbreviate=TRUE))

retention_fy$MonthN = factor(retention_fy$MonthN, levels=c("07","08", "09", "10", "11", "12", "01", "02", "03", "04", "05", "06"))



In [None]:

options(repr.plot.width = 14, repr.plot.height = 5)

p1 <- ggplot(retention_fy,aes(x = MonthN,y = new_editor_retention, group=fiscal_year, color = fiscal_year,size=fiscal_year,linetype = fiscal_year))+
    geom_line() +
    geom_point(data=retention_fy[(retention_fy$fiscal_year =="FY 2021/22"),],aes(x=MonthN, y=new_editor_retention, group=fiscal_year, color = fiscal_year),size = 4,color='#14866d')+
    scale_size_manual(values = c(1.1,1.1,1.3,2)) +
    scale_color_manual(values = c("FY 2019/20" = "#00fcc5", "FY 2020/21" = "#00af89","FY 2021/22" = "#14866d")) +

    scale_linetype_manual(values=c("twodash", "dashed","solid")) +
    scale_x_discrete(breaks = retention_fy$MonthN, labels = retention_fy$Month) +
    scale_y_continuous("Monthly New Editor Retention", labels = scales::percent,limits = c(0.04, 0.095)) +
    ggthemes::theme_tufte(base_size = 16,base_family = "serif") +
    theme(axis.text.x=element_text( hjust = 0,size = 18,family="Courier"),
          axis.text.y=element_text( hjust = 0,size = 24,family="Courier"),
          axis.title.x=element_blank(),
          panel.grid = element_line("gray80"),
          plot.title = element_text(size = 20,hjust = 0,face= "bold"),
          axis.title=element_text(size=16,hjust = 1),
          legend.title = element_blank(),
          legend.text =element_text( hjust = 0.5,size = 12),
          legend.key.width=unit(1.5,"cm"),
          panel.grid.minor.y = element_blank()) +
    labs(title = "New Editor Retention") 

p1

## Anoy edits testing

In [None]:
anoy_fy <- contributor %>% 
     select(month,anonymous_edits,non_anonymous_edits) %>%
     filter(month >= '2020-07-01') %>%
     melt(id.var = "month", measure.vars = c("anonymous_edits", "non_anonymous_edits") ) %>%
     mutate(fiscal_year = ifelse(month <'2021-07-01','FY 2020/21','FY 2021/22'),
           MonthN =as.factor(format(as.Date(month),"%m")),
         Month = months(as.Date(month), abbreviate=TRUE))

anoy_fy$MonthN = factor(anoy_fy$MonthN, levels=c("07","08", "09", "10", "11", "12", "01", "02", "03", "04", "05", "06"))


In [None]:
options(repr.plot.width = 14, repr.plot.height = 5)

p1 <- ggplot(anoy_fy,aes(x = MonthN)) +
    geom_line(aes(x = MonthN, y = value, group=interaction(fiscal_year,variable), color = variable,size=fiscal_year,linetype = fiscal_year)) +
    scale_size_manual(values = c(1.3,1.4)) +
    geom_point(data=anoy_fy[(anoy_fy$fiscal_year =="FY 2021/22"),],aes(x=MonthN, y=value, group=interaction(fiscal_year,variable), color = variable),size = 4)+
    scale_color_manual(values = c( "anonymous_edits" = "#dd3333","non_anonymous_edits" = "#14866d")) +
    scale_linetype_manual(values=c( "dashed","solid")) +
    scale_x_discrete(breaks =anoy_fy$MonthN, labels = anoy_fy$Month) +
    scale_y_continuous("Monthly Edits", labels = polloi::compress) +
    ggthemes::theme_tufte(base_size = 16,base_family = "serif") +
    theme(axis.text.x=element_text( hjust = 0,size = 18,family="Courier"),
          axis.text.y=element_text( hjust = 0,size = 24,family="Courier"),
          axis.title.x=element_blank(),
          panel.grid = element_line("gray80"),
          plot.title = element_text(size = 20,hjust = 0,face= "bold"),
          axis.title=element_text(size=16,hjust = 1),
          legend.title = element_blank(),
          legend.text =element_text( hjust = 0.5,size = 12),
          legend.key.width=unit(1.5,"cm"),
          panel.grid.minor.y = element_blank()) +
    labs(title = "Monthly Anonymous/non-Anonymous Edits") 

p1

In [None]:
anoy_fy_t <- anoy_fy %>% filter(variable=='anonymous_edits')
anoy_fy_f <- anoy_fy %>% filter(variable=='non_anonymous_edits')

In [None]:
options(repr.plot.width = 14, repr.plot.height = 5)

p1 <- ggplot(anoy_fy_t,aes(x = MonthN)) +
    geom_line(aes(x = MonthN, y = value, group=fiscal_year, color = fiscal_year,size=fiscal_year,linetype = fiscal_year)) +
    geom_point(data=anoy_fy[(anoy_fy$fiscal_year =="FY 2021/22"),],aes(x=MonthN, y=value, group=interaction(fiscal_year,variable)),size = 4,colour="#dd3333")+
    scale_size_manual(values = c(1.3,1.3)) +
    scale_color_manual(values = c("FY 2020/21" = "#b32424","FY 2021/22" = "#b32424")) +
    scale_linetype_manual(values=c("dashed","solid","solid")) +
    scale_x_discrete(breaks = anoy_fy_t$MonthN, labels = anoy_fy_t$Month) +
    scale_y_continuous("Monthly Anonymous Edits", labels = polloi::compress,limits = c(1.5E6, 2.5E6)) +
    ggthemes::theme_tufte(base_size = 16,base_family = "serif") +
    theme(axis.text.x=element_text( hjust = 0,size = 18,family="Courier"),
          axis.text.y=element_text( hjust = 0,size = 24,family="Courier"),
          axis.title.x=element_blank(),
          panel.grid = element_line("gray80"),
          plot.title = element_text(size = 20,hjust = 0,face= "bold"),
          axis.title=element_text(size=16,hjust = 1),
          legend.title = element_blank(),
          legend.text =element_text( hjust = 0.5,size = 12),
          legend.key.width=unit(1.5,"cm"),
          panel.grid.minor.y = element_blank()) +
    labs(title = "Monthly Anonymous Edits") 

p1

In [None]:
options(repr.plot.width = 14, repr.plot.height = 5)

p1 <- ggplot(anoy_fy_f,aes(x = MonthN)) +
    geom_line(aes(x = MonthN, y = value, group=fiscal_year, color = fiscal_year,size=fiscal_year,linetype = fiscal_year)) +
    geom_point(data=anoy_fy[(anoy_fy$fiscal_year =="FY 2021/22"),],aes(x=MonthN, y=value, group=interaction(fiscal_year,variable)),size = 4,colour="#14866d")+
    scale_size_manual(values = c(1.2,1.3)) +
    scale_color_manual(values = c("FY 2020/21" = "#14866d","FY 2021/22" = "#14866d")) +
    scale_linetype_manual(values=c("dashed","solid")) +
    scale_x_discrete(breaks = anoy_fy_f$MonthN, labels = anoy_fy_f$Month) +
    scale_y_continuous("Monthly non-Anonymous Edits", labels = polloi::compress,limits = c(3E7, 6.5E7)) +
    ggthemes::theme_tufte(base_size = 16,base_family = "serif") +
    theme(axis.text.x=element_text( hjust = 0,size = 18,family="Courier"),
          axis.text.y=element_text( hjust = 0,size = 24,family="Courier"),
          axis.title.x=element_blank(),
          panel.grid = element_line("gray80"),
          plot.title = element_text(size = 20,hjust = 0,face= "bold"),
          axis.title=element_text(size=16,hjust = 1),
          legend.title = element_blank(),
          legend.text =element_text( hjust = 0.5,size = 12),
          legend.key.width=unit(1.5,"cm"),
          panel.grid.minor.y = element_blank()) +
    labs(title = "Monthly non-Anonymous Edits") 

p1