In [1]:
import pandas as pd
from plotnine import *
from datetime import date
import textwrap
from pathlib import Path

In [2]:
# Read data from openphil. 403 if storage_options are not filled
data_url = "https://www.openphilanthropy.org/giving/grants/spreadsheet"
storage_options = {'User-Agent': 'Mozilla/5.0'}

df = pd.read_csv(data_url, storage_options=storage_options)

In [3]:
# Convert amounts to numbers, dates to a date type, and add column for year
df["Amount"] = df["Amount"].replace("[$,]", "", regex=True).astype(float)
df['Date'] = pd.to_datetime(df['Date'])
df["year"] = pd.DatetimeIndex(df['Date']).year
df["Focus Area"] = df["Focus Area"].apply(lambda x: textwrap.fill(x, 15))

In [4]:
# Filter out data from beginning of 2021
df = df[(df["Date"] >= f'2021-01-01')]

In [5]:
# Sum amounts per Focus Area
df = (df.groupby("Focus Area")["Amount"]
        .sum().reset_index()
)

# Calculate amounts in millions
df["amount_millions"] = df["Amount"].div(10**6)

In [6]:
# Make columns categorical in reasonable order for nicer plotting
focus_areas = (df.groupby(["Focus Area"])["Amount"]
                .sum()
                .reset_index()
                .sort_values("Amount", ignore_index=True)
                ["Focus Area"]
)

df['Focus Area'] = pd.Categorical(df["Focus Area"], categories=focus_areas, ordered=True)

In [None]:
base_graph = (ggplot(df, aes(x="Focus Area", y="amount_millions")) + 
              geom_bar(stat="identity", fill="#0c869b") +
              coord_flip() +
              ylab("Total grants (Million $)") +
              ggtitle(f"Openphil funding from start of 2021 until {date.today()} by Focus Area") +
              theme(aspect_ratio = 1.5)
)

standard_graph = (base_graph + 
                  geom_text(aes(label="amount_millions"), color = '#555555', ha = "left", format_string = "{:.1f}", nudge_y = 1) +
                  scale_y_continuous(expand=(0,0,0.2,0))
)

standard_graph

In [None]:
log_graph = (base_graph + 
             scale_y_log10() +
             geom_text(aes(label="amount_millions"), color = '#555555', ha = "left", format_string = "{:.1f}", nudge_y = 0.5) +
             scale_y_continuous(expand=(0,0,0.15,0)) +
             ylab("Total grants (Million $, log scale)") +
             theme(aspect_ratio = 1.5)
)

log_graph

In [None]:
import os
Path("pngs").mkdir(parents=True, exist_ok=True)
name = "2021-now-graph-per-cause-area"
standard_graph.save(f'pngs/{name}.png')