In [None]:
import datetime as dt
import functools
from pathlib import Path

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
from scipy import stats


In [None]:
# Configuration

raw_data_dir = Path("../../raw_data")
reports_dir = Path("../../reports")
season_id = 25
week_date = dt.datetime(2019, 10, 31)
season_raw_data_dir = raw_data_dir / f"season{season_id}"
season_reports_dir = reports_dir / f"season{season_id}"
season_box_scores_dir = raw_data_dir / f"season{season_id}" / "box_scores"

pd.options.display.float_format = "{:,.2f}".format

In [None]:
field_goals = pd.read_csv(season_raw_data_dir / "field_goals.csv")
# field_goals.head(5)

In [None]:
player_logs_raw = pd.read_csv(season_raw_data_dir / "player_logs.csv")
player_logs = player_logs_raw.groupby(["game_id", "team_id", "player_id"])

# player_logs.get_group(("20191017-1845-hotshots-rockets", "hotshots", "hotshots-harrison"))["stl"]

In [None]:
"""
true_fga includes fouled shots, but does not include free throw attempts.
points_scored includes points scored with free throws.
points_scored_weighed discounts missed shots which result in an offensive rebound.
"""


summary_rows = []
for (game_id, team_id, player_id), group in field_goals.groupby(["game_id", "team_id", "player_id"]):
    is_dreb = (group["points_scored"] == 0) & (group["outcome"] == "DefensiveRebound")
    is_oreb = (group["points_scored"] == 0) & (group["outcome"] == "OffensiveRebound")
    true_fga = len(group.index)
    true_fg_missed = len(group[is_dreb | is_oreb].index)
    num_miss_and_oreb = len(group[is_oreb].index)
    num_miss_and_dreb = len(group[is_dreb].index)
    row = {
        "game_id": game_id,
        "team_id": team_id,
        "player_id": player_id,
        "points_scored_total": group["points_scored"].sum(),
        "true_fga": true_fga,
        "points_scored": group["points_scored"].mean(),
        "points_scored_weighed": group["points_scored"].sum() / (true_fga - num_miss_and_oreb) if group["points_scored"].sum() else 0,
        "miss_and_oreb": len(group[is_oreb].index) / true_fg_missed if true_fg_missed else 0,
        "miss_and_dreb": len(group[is_dreb].index) / true_fg_missed if true_fg_missed else 0,
        
    }
    summary_rows.append(row)

summary = pd.DataFrame(summary_rows)
summary_sorted = summary.sort_values(["points_scored_weighed", "miss_and_oreb", "true_fga"], ascending=[False, False, True])
summary_sorted.reset_index(drop=True, inplace=True)
summary_sorted.index += 1
summary_sorted
