In [1]:
import pandas as pd
from plotnine import *
from datetime import date
from pathlib import Path

In [2]:
# Read data from openphil. 403 if storage_options are not filled
data_url = "https://www.openphilanthropy.org/giving/grants/spreadsheet"
storage_options = {'User-Agent': 'Mozilla/5.0'}

df = pd.read_csv(data_url, storage_options=storage_options)

In [3]:
# Convert amounts to numbers, dates to a date type, and add column for year
df["Amount"] = df["Amount"].replace("[$,]", "", regex=True).astype(float)
df['Date'] = pd.to_datetime(df['Date'])
df["year"] = pd.DatetimeIndex(df['Date']).year

In [4]:
# Filter out incomplete data for current year
year = date.today().year
df = df[(df["Date"] < f'{year}-01-01')]

In [5]:
# Find n largest focus areas and get data for only those focus areas
n = 6 
largest_focus_areas = (df.groupby(["Focus Area"])["Amount"]
                         .sum().reset_index()
                         .sort_values("Amount", ignore_index=True)
                         .tail(n)["Focus Area"]
)

df = df[df["Focus Area"].isin(largest_focus_areas)]

In [6]:
# Sum amounts per Focus Area
df = (df.groupby(["Focus Area", "year"])["Amount"]
        .sum().reset_index()
        .sort_values("Amount").reset_index(drop=True)
)

# Calculate amounts in millions
df["amount_millions"] = df["Amount"].div(10**6)

In [7]:
# Make columns categorical in reasonable order for nicer plotting
df['Focus Area'] = pd.Categorical(df["Focus Area"], categories=largest_focus_areas, ordered=True)
df['year'] = pd.Categorical(df["year"], ordered=True)
df = df.sort_values('year')

In [None]:
graph = (
    ggplot(df, aes(x="year", y="amount_millions")) +
    geom_bar(stat='identity', fill="#0c869b") + 
    geom_text(aes(label="amount_millions"), color = '#555555', va = "bottom", format_string = "{:.1f}", size=8) +
    facet_wrap('Focus Area') +
    ylab("Total grants (Million $)") +
    ggtitle(f"Openphil funding by Focus Area, 6 largest focus areas") +
    theme(figure_size=(15, 8)) +
    scale_y_continuous(expand=(0,0,0.1,0)) 
)

graph

In [None]:
Path("pngs").mkdir(parents=True, exist_ok=True)
name = "alltime-cause-areas-per-year"
graph.save(f'pngs/{name}.png')