In [1]:
import duckdb

import pandas as pd
import numpy as np

In [36]:
def get_season_schedule(db, year):
    """
    Fetch the season schedule for a given year from the database.
    """
    schedule_df = db.sql(
        f"""
        SELECT
            Year, Week, Home_Team, Away_Team,
            Home_Won
        FROM game_features
        WHERE Year = {year}
        ORDER BY Week, Home_Team, Away_Team
        """
    ).df()
    return schedule_df

def get_weekly_winners(schedule_df):
    winners_by_week = {}
    for index, row in schedule_df.iterrows():
        week = row['Week']
        if week not in winners_by_week:
            winners_by_week[week] = []
        if row['Home_Won'] == 1:
            winners_by_week[week].append(row['Home_Team'])
        else:
            winners_by_week[week].append(row['Away_Team'])

    return winners_by_week

def load_best_paths(path):
    df = pd.read_csv(path)
    return df.sort_values(by=['log_prob'], ascending=False)

def load_greedy_path(year):
    with open(f"./results/greedy_path_{year}_k10000.json", "r") as f:
        greedy_path = pd.read_json(f)

        path = greedy_path.values.reshape(1, -1)
        path = pd.DataFrame(
            path,
            columns=[
                f"week_{i}" for i in range(1, path.shape[1] + 1)
            ]
        )
    
    return path


In [19]:
def eval_year(best_paths, winners):
    errors = [0] * len(best_paths)
    for wk in range(1, 18 + 1):
        if f"week_{wk}" not in best_paths.columns:
            continue
        
        for i, r in best_paths.iterrows():
            w_pick = r[f"week_{wk}"]
            if w_pick not in winners[wk]:
                errors[i] += 1
        
    return min(errors)


In [40]:
df = []

week = 1
for year in range(2013, 2025):
    with duckdb.connect("./data/data.db") as db:
        schedule_df = get_season_schedule(db, year)
    winners = get_weekly_winners(schedule_df)
    paths = load_best_paths("./results/beam_{year}_wk-{week}_k10000.csv".format(year=year, week=week))
    g_path = load_greedy_path(year)

    best_path = paths.iloc[:1]
    best_3_paths = paths.iloc[:3]
    best_10_paths = paths.iloc[:10]
    best_50_paths = paths.iloc[:50]
    best_100_paths = paths.iloc[:100]

    df.append(
        (
            year,
            eval_year(best_path, winners),
            eval_year(best_3_paths, winners),
            eval_year(best_10_paths, winners),
            eval_year(best_50_paths, winners),
            eval_year(best_100_paths, winners),
            eval_year(g_path, winners)
        )
    )

df = pd.DataFrame(
    df,
    columns=[
        "year",
        "best_path",
        "best_3_paths",
        "best_10_paths",
        "best_50_paths",
        "best_100_paths",
        "greedy_path"
    ]
)

df

Unnamed: 0,year,best_path,best_3_paths,best_10_paths,best_50_paths,best_100_paths,greedy_path
0,2013,6,3,3,1,1,5
1,2014,2,2,2,1,1,3
2,2015,4,4,3,3,2,4
3,2016,3,3,1,1,1,2
4,2017,6,5,3,2,2,4
5,2018,3,3,3,3,3,4
6,2019,3,3,1,1,1,2
7,2020,5,4,4,3,3,1
8,2021,4,3,3,2,2,1
9,2022,6,6,5,4,3,3


In [56]:
import plotly.graph_objects as go
import numpy as np

# Prepare data for plotting
methods = [
    "best_path",
    "best_3_paths",
    "best_10_paths",
    "best_50_paths",
    "best_100_paths",
    "greedy_path"
]
colors = [
    "deepskyblue", "limegreen", "gold", "orange", "orangered", "violet"
]

fig = go.Figure()

# Plot each method as a line
for method, color in zip(methods, colors):
    fig.add_trace(go.Scatter(
        x=df["year"],
        y=df[method],
        mode="lines+markers",
        name=method.replace("_", " ").title(),
        line=dict(width=2, color=color),
        marker=dict(size=5)
    ))
    # Add horizontal average line for each method
    avg = df[method].mean()
    fig.add_hline(
        y=avg,
        line_dash="dot",
        line_color=color,
        annotation_text=f"Avg {method.replace('_', ' ').title()}: {avg:.2f}",
        annotation_position="bottom right" if method == "greedy_path" else "top right",
        annotation_font_color=color,
        opacity=0.5,
        annotation_font_size=16,
    )

fig.update_layout(
    title="Model Evaluation by Season (2013-2024)",
    xaxis_title="Season (Year)",
    yaxis_title="Evaluation Score (Lower is Better)",
    template="plotly_dark",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="white"
    ),
    legend_title_text="Evaluation Method",
    title_x=0.5,
    height=800,
)

fig.show()
