In [1]:
import pandas as pd
import json
from pathlib import Path

CSV_PATH = "prizes.csv"

df = pd.read_csv(CSV_PATH, dtype=str)
df.columns = df.columns.str.strip().str.lower()



In [2]:
# --- Winners only
df = df[df["role"]=="winner"].copy()

# --- Filter to 3 prizes of interest
def bucket_prize(name):
    if not isinstance(name, str):
        return None
    s = name.lower()

    if "whiting" in s:
        return "Whiting Award"
   
    return None

df["prize_bucket"] = df["prize_name"].apply(bucket_prize)


In [13]:
whiting_df = df[df['prize_name'] == 'Whiting Award']
stegner_whiting_df = df[df['stegner'] == 'Stegner']
len(whiting_df)

348

In [21]:
stegner_whiting_df

Unnamed: 0,full_name,gender,mfa_degree,role,prize_year,prize_name,prize_amount,prize_institution,prize_type,prize_genre,author_wikidata,author_viaf,author_lccn,elite_institution,graduate_degree,stegner,title_of_winning_book
69,Marilyn Chin,female,University of Iowa,judge,2020,Jean Stein Book Award,"$75,000.00",PEN America,book,no genre,Q6763512,47840838,n88045226,Stanford University,graduate,Stegner,Where Reasons End
72,Marilyn Chin,female,University of Iowa,judge,2020,Pulitzer Prize,"$15,000.00",Columbia University,book,poetry,Q6763512,47840838,n88045226,Stanford University,graduate,Stegner,The Tradition
85,Jenny Offill,female,,judge,2020,First Novel Prize,"$15,000.00",Center for Fiction,book,prose,Q15522372,79457224,n99002654,Stanford University,graduate,Stegner,Luster
120,Marilyn Chin,female,University of Iowa,judge,2020,Academy of American Poets Fellowship,"$25,000.00",Academy of American Poets,career,poetry,Q6763512,47840838,n88045226,Stanford University,graduate,Stegner,
121,Marilyn Chin,female,University of Iowa,winner,2020,Ruth Lilly Poetry Prize,"$100,000.00",Poetry Foundation,career,poetry,Q6763512,47840838,n88045226,Stanford University,graduate,Stegner,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6043,Thom Gunn,male,,winner,1964,Arts and Letters Awards,"$10,000.00",American Academy of Arts and Letters,career,no genre,Q2141920,100054570,n79066390,Stanford University,graduate,Stegner,
6169,Donald Hall,male,,judge,1961,Bollingen Prize for Poetry,"$10,000.00",Yale University,career,poetry,Q976924,49226675,n79043673,"Harvard University, Stanford University",graduate,Stegner,
6223,Donald Hall,male,,judge,1960,Bollingen Prize for Poetry,"$10,000.00",Yale University,career,poetry,Q976924,49226675,n79043673,"Harvard University, Stanford University",graduate,Stegner,
6691,Paul Bowles,male,,winner,1950,Arts and Letters Awards,"$10,000.00",American Academy of Arts and Letters,career,no genre,Q358342,112464042,n79043627,Stanford University,graduate,Stegner,


In [16]:
import pandas as pd
import json

df = pd.read_csv("prizes.csv")

# Focus only Whiting Award winners
winners = df[(df["prize_name"]=="Whiting Award") & (df["role"]=="winner")]

links = []

for _, row in winners.iterrows():
    inst = row["elite_institution"] if pd.notna(row["elite_institution"]) and row["elite_institution"] != "" else "No Elite Institution"
    mfa  = row["mfa_degree"] if pd.notna(row["mfa_degree"]) and row["mfa_degree"] != "" else "No MFA"
    steg = row["stegner"] if pd.notna(row["stegner"]) and row["stegner"] != "" else "No Stegner"

    # Chain: Institution → MFA
    links.append({"source": inst, "target": mfa, "value": 1})

    # MFA → Stegner
    links.append({"source": mfa, "target": steg, "value": 1})

    # Stegner → Whiting
    links.append({"source": steg, "target": "Whiting Award", "value": 1})

links_df = pd.DataFrame(links)

# Aggregate duplicate links (so multiple winners collapse into thicker flows)
links_df = links_df.groupby(["source", "target"], as_index=False).sum()

print("Total Whiting winners:", winners["full_name"].nunique())
print("Links in Sankey:", len(links_df))


Total Whiting winners: 348
Links in Sankey: 183


In [17]:
len(links_df)

183

In [15]:
# How many total Whiting winners in your dataset?
winners = df[df["prize_name"] == "Whiting Award"]
print("Total Whiting winners:", len(winners["full_name"].unique()))

# How many links did you build?
print("Links in sankey:", len(links_df))


Total Whiting winners: 348
Links in sankey: 10


In [19]:
import pandas as pd

# Load your full dataset (replace with actual filename)
df = pd.read_csv("prizes.csv")

# Filter: only winners of the Whiting Award
whiting_winners = df[
    (df["role"].str.lower() == "winner") &
    (df["prize_name"].str.contains("Whiting Award", case=False, na=False))
]

# Select only useful columns for your table
whiting_winners = whiting_winners[[
    "full_name",
    "prize_year",
    "mfa_degree",
    "elite_institution",
    "stegner",
    "prize_name"
]]

# Drop duplicates (some winners may appear multiple times if they had multiple prizes)
# whiting_winners = whiting_winners.drop_duplicates()

# Save to CSV
whiting_winners.to_csv("whiting_winners.csv", index=False)


