In [None]:
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
# 0 is best, 2 is worst
def brier(probs, idx_correct):
    tot = sum(probs)
    if tot == 0:
        return 2
    probs = [p/tot for p in probs]
    score = 0
    for i in range(len(probs)):
        expected = int(i == idx_correct) # 1 or 0
        score += (probs[i] - expected) ** 2
    return score

print(brier([100,0,0,0], 0))
print(brier([100,0,0,0], 3))
print(brier([0.1,0.1,0.1,0.7], 3))
print(brier([0.1,0.1,0.1,0.7], 1))
print(brier([0.15,0.15,0.15,0.55], 3))
print(brier([0.15,0.15,0.15,0.55], 1))
print(brier([1,2,3,4],0))

In [None]:
options = "ABCD"
opt_cols = [f"Probability of {opt}" for opt in options]
pdf = pd.read_csv("players.csv") # player DataFrame
players = dict(pdf.set_index("Email Address")["What do you want your display name to be?"])
gdf = pd.read_csv("guesses.csv") # guess DataFrame
gdf = gdf.drop_duplicates(subset=["Email Address", "Question Number"], keep="first")
adf = pd.read_csv("answers.csv").set_index("qnum") # answer DataFrame
adf["correct_letter"] = adf["correct"].apply(lambda s: s[:1])

In [None]:
def row_brier(row):
    qnum = row["Question Number"]
    if not qnum in adf.index:
        return 2
    correct = adf.loc[qnum, "correct_letter"]
    return brier(list(row[opt_cols]), options.index(correct))

for i in gdf.index:
    gdf.loc[i, "brier"] = row_brier(gdf.loc[i])

In [None]:
def score_user(email, start, end):
    df = gdf[gdf["Email Address"] == email]
    df = df.set_index("Question Number", drop=False)
    score = 0
    for qnum in range(start, end+1):
        score += df.loc[qnum, "brier"] if qnum in df.index else 2
    return score

def score_users(start, end):
    rows = []
    for email, alias in players.items():
        rows.append({"player":alias, "brier":score_user(email, start, end)})
    return pd.DataFrame(rows).sort_values(by="brier")
score_users(1, 1)

In [None]:
def score_table(start, end):
    df = pd.DataFrame()
    for i in range(start, end+1):
        if i in adf.index:
            df.loc[i, "question"] = adf.loc[i, "question"]
    for i in range(len(gdf)):
        row = gdf.iloc[i]
        email = row["Email Address"]
        qnum = row["Question Number"]
        if email in players and qnum in df.index:
            df.loc[qnum, players[email]] = row["brier"]
    df = df.fillna(2)
    means = df.iloc[:,1:].mean(axis=0)
    df.loc[0, "question"] = "average"
    df.iloc[-1,1:] = means
    return df.sort_index()

with open("scores.html", "w") as f:
    scores = score_table(1, 20)
    scores.round(2).to_html(f)
ranked = scores.iloc[0,1:].sort_values()
print(ranked.mean(), ranked.median())
ranked

In [None]:
plt.rcParams["font.size"] = 32
scores.set_index("question").mean(axis=1).sort_values().plot.barh(figsize=(8, 16))