In [1]:
import pandas as pd
from plotnine import *
from datetime import date
from pathlib import Path

In [2]:
# Read data from openphil. 403 if storage_options are not filled
data_url = "https://www.openphilanthropy.org/giving/grants/spreadsheet"
storage_options = {'User-Agent': 'Mozilla/5.0'}

df = pd.read_csv(data_url, storage_options=storage_options)

In [3]:
# Convert amounts to numbers, dates to a date type, and add column for year
df["Amount"] = df["Amount"].replace("[$,]", "", regex=True).astype(float)
df['Date'] = pd.to_datetime(df['Date'])
df["year"] = pd.DatetimeIndex(df['Date']).year

In [5]:
# Sum amounts per Focus Area
df = (df.groupby(["Focus Area", "year"])["Amount"]
        .sum().reset_index()
        .sort_values("Amount").reset_index(drop=True)
)

# Calculate amounts in millions
df["amount_millions"] = df["Amount"].div(10**6)

In [6]:
# Make columns categorical in reasonable order for nicer plotting
df['Focus Area'] = pd.Categorical(df["Focus Area"], categories=pd.unique(df["Focus Area"]), ordered=True)
df['year'] = pd.Categorical(df["year"], ordered=True)
df = df.sort_values('year')

In [7]:
base_graph = (
     ggplot(df, aes(x="Focus Area", y="amount_millions")) + 
     geom_bar(stat="identity", fill="#0c869b") +
     coord_flip() +
     ylab("Total grants (Million $)") +
     ggtitle(f"Openphil funding by Focus Area, up until {date.today()}")
)

In [None]:
normal_graph = (
    base_graph + 
    theme(aspect_ratio = 1) + 
    geom_text(aes(label="amount_millions"), color = '#555555', ha = "left", format_string = "{:.1f}", nudge_y = 1) +
    scale_y_continuous(expand=(0,0,0.2,0)) 
)

normal_graph

In [None]:
log_graph = (
    base_graph +
    scale_y_log10(expand=(0,0,0.15,0)) + 
    ylab("Total grants (Million $, log scale)") + 
    theme(aspect_ratio = 1) + 
    geom_text(aes(label="amount_millions"), color = '#555555', ha = "left", format_string = "{:.1f}")
)

log_graph

In [None]:
Path("pngs").mkdir(parents=True, exist_ok=True)
name = "alltime-graph-cause-areas"
normal_graph.save(f'pngs/{name}.png')
log_graph.save(f'pngs/{name}_log_scale.png')