In [None]:
# To generate a PDF output (box scores look ugly):
# jupyter nbconvert --to pdf --template notebooks/hidecode notebooks/top5.ipynb

# To generate a HTML:
# jupyter nbconvert --no-input --to html notebooks/top5.ipynb

In [None]:
import datetime as dt
import functools
from pathlib import Path

import numpy as np
import pandas as pd
from IPython.display import display, Markdown


In [None]:
# Configuration

raw_data_dir = Path("../../raw_data")
reports_dir = Path("../../reports")
season_id = 25
week_date = dt.datetime(2019, 10, 24)
season_raw_data_dir = raw_data_dir / f"season{season_id}"
season_reports_dir = reports_dir / f"season{season_id}"
season_box_scores_dir = raw_data_dir / f"season{season_id}" / "box_scores"
PLAYERS_MIN_GAMES_PLAYED = 1

In [None]:
display(Markdown("# thegame.report"))
display(Markdown(f"Season {season_id}, week of {week_date.strftime('%Y-%m-%d')}"))
display(Markdown(f"Report generated at {dt.datetime.utcnow()}"))
display(Markdown("### Contents"))
display(Markdown(" 1. Labels"))
display(Markdown(" 2. This Week's Box Scores"))
display(Markdown(" 3. Stat Leaders (Teams and Individuals)"))

In [None]:
measure_descriptions = pd.read_csv(raw_data_dir / "stat_descriptions.csv")
display(Markdown("## 1. Labels"))

In [None]:
display(measure_descriptions[["abbreviation", "description"]])

In [None]:

stats_groups = [
    [
        "Performance Index Rating",
        ["pir", "pts", "reb", "ast", "tov", "stl", "blk", "blka", "fc", "fd"],
        [False, False, False, False, True, False, False, True, True, False],
    ],
    [
        "Points",
        ["pts", "fg%", "fgm", "fga", "3p%", "3pm", "3pa", "ft%", "ftm", "fta"],
        [False, False, False, True, False, False, True, False, False, True],
    ],    
    [
        "Rebounds",
        ["reb", "oreb", "dreb"],
        [False, False, False],
    ],
    [
        "Offensive Rebounds",
        ["oreb", "dreb", "reb"],
        [False, False, False],
    ],
    [
        "Assists",
        ["ast", "ast_tov_ratio", "tov"],
        [False, False, True],
    ],
    [
        "Steals",
        ["stl", "tov", "fc"],
        [False, True, True],
    ],
    [
        "Blocks",
        ["blk", "blka"],
        [False, True],
    ],
    [
        "Turnovers",
        ["tov",],
        [False,],
    ],
    [
        "Fouls Committed",
        ["fc", "fd",],
        [False, True],
    ],
    [
        "Field Goal Percentage",
        ["fg%", "fgm", "fga"],
        [False, False, True],
    ],
    [
        "Three Point Shot Percentage",
        ["3p%", "3pm", "3pa"],
        [False, False, True],
    ],
    [
        "Free Throw Percentage",
        ["ft%", "ftm", "fta"],
        [False, False, True],
    ],
    [
        "Plus Minus Points",
        ["plus_minus_pts"],
        [False],
    ],
]


def ast_tov_ratio(r):
    if not r["tov"]:
        return r["ast"]
    return round(10.0 * r["ast"] / r["tov"]) / 10


def percentage(r, prefix):
    if not r[f"{prefix}a"]:
        return 0
    return round(1000.0 * r[f"{prefix}m"] / r[f"{prefix}a"]) / 10



calculated_measures = {
    "fg%": functools.partial(percentage, prefix="fg"),
    "3p%": functools.partial(percentage, prefix="3p"),
    "ft%": functools.partial(percentage, prefix="ft"),
    "ast_tov_ratio": ast_tov_ratio,
}


enabled_player_measures = [
    "pts",
    "fgm",
    "fga",
    "fg%",
    "3pm",
    "3pa",
    "3p%",
    "ftm",
    "fta",
    "ft%",
    "oreb",
    "dreb",
    "reb",
    "ast",
    "stl",
    "tov",
    "blk",
    "blka",
    "fc",
    "fd",
    "pir",
    "plus_minus_pts",
    "seconds_played",
    "ast_tov_ratio",
]
enabled_team_measures = [
    "pts",
    "fgm",
    "fga",
    "fg%",
    "3pm",
    "3pa",
    "3p%",
    "ftm",
    "fta",
    "ft%",
    "oreb",
    "dreb",
    "reb",
    "ast",
    "stl",
    "tov",
    "blk",
    "blka",
    "fc",
    "fd",
    "ast_tov_ratio",
]

measure_labels = {
    "ast_tov_ratio": "AST / TOV",
    "player_gender": "g",
    "plus_minus_pts": "+/-",
    "seconds_played": "min",
}


def fix_column_names(measure_names):
    fixed = []
    for c in measure_names:
        if c in measure_labels:
            fixed.append(measure_labels[c])
            continue
        if c.endswith("_name"):
            c = c[:-5]
        c = c.replace("_", " ")
        fixed.append(c.upper())
    return fixed


plain_player_measures = [s for s in enabled_player_measures if s not in calculated_measures]
calculated_player_measures_sorted = [s for s in enabled_player_measures if s in calculated_measures]

plain_team_measures = [s for s in enabled_team_measures if s not in calculated_measures]
calculated_team_measures_sorted = [s for s in enabled_team_measures if s in calculated_measures]

In [None]:
# Box scores of this week's games

display(Markdown("## 2. This Week's Box Scores"))

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

for p in season_box_scores_dir.iterdir():
    g_date = dt.datetime.strptime(p.stem[:8], "%Y%m%d").date()
    if g_date != week_date.date():
        continue
    box_score = pd.read_csv(p)
    selected_columns = [c for c in box_score.columns if c not in ("player_id",)]
    box_score = box_score[selected_columns]
    box_score = box_score.replace(np.nan, "", regex=True)
    box_score.columns = fix_column_names(box_score.columns)
    display(box_score)

In [None]:
display(Markdown("## 3. Stats Leaders (Teams and Individuals)"))

player_logs = pd.read_csv(season_raw_data_dir / "player_logs.csv")
team_logs = pd.read_csv(season_raw_data_dir / "team_logs.csv")


def sort_fields_to_avg_sort_fields(sort_fields):
    return [f"{sf}_avg" if sf not in calculated_measures else sf for sf in sort_fields]


player_totals = []
for (team_id, player_id, player_gender), group in player_logs.groupby(["team_id", "player_id", "player_gender"]):
    pt = {
        "team_id": team_id,
        "team_name": group["team_name"].iloc[0],
        "player_id": player_id,
        "player_gender": player_gender,
        "player_name": group["player_name"].iloc[0],
        "gp": group["player_name"].size,  # games played
        **{s: group[s].sum() for s in plain_player_measures},
        **{f"{s}_avg": group[s].mean() for s in plain_player_measures},
    }
    
    for s in calculated_player_measures_sorted:
        pt[s] = calculated_measures[s](pt)
    
    player_totals.append(pt)

team_totals = []
for team_id, group in team_logs.groupby("team_id"):
    tt = {
        "team_id": team_id,
        "team_name": group["team_name"].iloc[0],
        "gp": group["team_name"].size,  # games played
        **{s: group[s].sum() for s in plain_team_measures},
        **{f"{s}_avg": group[s].mean() for s in plain_team_measures},
    }
    
    for s in calculated_team_measures_sorted:
        tt[s] = calculated_measures[s](tt)
    
    team_totals.append(tt)
    
ptdf = pd.DataFrame(player_totals)
ttdf = pd.DataFrame(team_totals)

for title, sort_fields, sort_ascending in stats_groups:
    display(Markdown(f"### {title}"))
    
    if all(sf in enabled_team_measures for sf in sort_fields):
        avg_sort_fields = sort_fields_to_avg_sort_fields(sort_fields)
        teams_sorted_by_field = ttdf.sort_values(
            avg_sort_fields,
            ascending=sort_ascending,
        )
        top8_by_field = teams_sorted_by_field[[
            "team_name",
            "gp",
            *avg_sort_fields,
            *(sf for sf in sort_fields if sf not in avg_sort_fields),
        ]].head(8)
        top8_by_field.reset_index(drop=True, inplace=True)
        top8_by_field.index += 1
        top8_by_field.columns = fix_column_names(top8_by_field.columns)
        display(top8_by_field)

    if all(sf in enabled_player_measures for sf in sort_fields):
        avg_sort_fields = sort_fields_to_avg_sort_fields(sort_fields)
        for gender in ("f", "m"):
            this_gender = ptdf["player_gender"] == gender
            with_min_games = ptdf["gp"] >= PLAYERS_MIN_GAMES_PLAYED
            sorted_by_field = ptdf[this_gender & with_min_games].sort_values(
                avg_sort_fields,
                ascending=sort_ascending,
            )
            top5_by_field = sorted_by_field[[
                "player_name",
                "team_name",
                "gp",
                *avg_sort_fields,
                *(sf for sf in sort_fields if sf not in avg_sort_fields),
            ]].head(10)
            top5_by_field.reset_index(drop=True, inplace=True)
            top5_by_field.index += 1
            top5_by_field.columns = fix_column_names(top5_by_field.columns)
            display(top5_by_field)
