In [1]:
import pandas as pd
from plotnine import *
from pathlib import Path

In [2]:
# Read data from openphil. 403 if storage_options are not filled
data_url = "https://www.openphilanthropy.org/giving/grants/spreadsheet"
storage_options = {'User-Agent': 'Mozilla/5.0'}

df = pd.read_csv(data_url, storage_options=storage_options)

In [3]:
# Convert amounts to numbers, dates to a date type, and add column for year
df["Amount"] = df["Amount"].replace("[$,]", "", regex=True).astype(float)
df['Date'] = pd.to_datetime(df['Date'])
df["year"] = pd.DatetimeIndex(df['Date']).year

In [4]:
# Filter out data for just this year
year = 2021
df = df[(df["Date"] >= f'{year}-01-01') & (df["Date"] < f'{year + 1}-01-01')]

In [5]:
# Sum amounts per Focus Area
df = (df.groupby(["Focus Area", "year"])["Amount"]
        .sum().reset_index()
        .sort_values("Amount").reset_index(drop=True)
)

# Calculate amounts in millions
df["amount_millions"] = df["Amount"].div(10**6)

In [6]:
# Make columns categorical in reasonable order for nicer plotting
df['Focus Area'] = pd.Categorical(df["Focus Area"], categories=pd.unique(df["Focus Area"]), ordered=True)
df['year'] = pd.Categorical(df["year"], ordered=True)
df = df.sort_values('year')

In [None]:
g = (ggplot(df, aes(x="Focus Area", y="amount_millions")) + 
     geom_col() +
     coord_flip() +
     ylab("Total grants (Million $)") +
     ggtitle(f"Openphil funding in {year} by Focus Area")
)

g

In [None]:
g_log = g + scale_y_log10() + ylab("Total grants (Million $, log scale)")

g_log

In [None]:
Path("pngs").mkdir(parents=True, exist_ok=True)
name = "alltime-graph-cause-areas"
g.save(f'pngs/{name}.png')
g_log.save(f'pngs/{name}_log_scale.png')