In [None]:
from pathlib import Path

import pandas as pd


reports_dir = Path("../../reports")

In [None]:
player_logs = pd.read_csv(reports_dir / "player_logs.csv")

In [None]:
team_logs = pd.read_csv(reports_dir / "team_logs.csv")

In [None]:
# Add "outcome" column
team_logs.loc[team_logs.pts > team_logs.opponent_pts, "outcome"] = "W"  # win
team_logs.loc[team_logs.pts == team_logs.opponent_pts, "outcome"] = "T"  # tie
team_logs.loc[team_logs.pts < team_logs.opponent_pts, "outcome"] = "D"  # defeat

In [None]:
# To compare the efffect of a player's presence vs absence, we need to know all players who have played for a team
# in a season. We create a column for each player and put a "Y" if they played that game for the particular team
# or "OPP" if they were the opponent

# First initialise all values to "N"
for player_id in player_logs["player_id"].unique():
    team_logs[f"p_{player_id.replace('-', '_')}"] = "N"

for (game_id, team_id), group in player_logs.groupby(["game_id", "team_id"]):
    is_this_team = (team_logs.game_id == game_id) & (team_logs.team_id == team_id)
    is_other_team = (team_logs.game_id == game_id) & (team_logs.opponent_id == team_id)
    for player_id in group["player_id"]:
        player_id_column = f"p_{player_id.replace('-', '_')}"
        team_logs.loc[is_this_team, player_id_column] = "Y"
        team_logs.loc[is_other_team, player_id_column] = "OPP"

In [None]:
# Write output
team_logs.to_csv(
    reports_dir / "team_logs_with_players.csv",
    float_format="%.2f",
    index=False,
)

In [None]:
x_factors = []
for player_id in player_logs["player_id"].unique():
    player_column = f"p_{player_id.replace('-', '_')}"
    players_teams = player_logs[player_logs.player_id == player_id]["team_id"].unique()
    for team_id in players_teams:
        report = {
            "player_id": player_id,
            "team_id": team_id,
        }
        for yes_or_no, group in team_logs[team_logs.team_id == team_id].groupby([player_column]):
            if yes_or_no == "OPP":
                continue
            yes_or_no = yes_or_no.lower()
            report.update({
                f"{yes_or_no}_num_games": len(group.index),
                f"{yes_or_no}_defensive_eff": group["defensive_eff"].mean(),
                f"{yes_or_no}_offensive_eff": group["offensive_eff"].mean(),
                f"{yes_or_no}_possessions": group["possessions"].mean(),
                f"{yes_or_no}_ast_total": group["ast"].sum(),
                f"{yes_or_no}_tov_total": group["tov"].sum(),
            })
        x_factors.append(report)

x_factors = pd.DataFrame(x_factors)

x_factors["defensive_eff_improv"] = x_factors["n_defensive_eff"] - x_factors["y_defensive_eff"]
x_factors["offensive_eff_improv"] = x_factors["y_offensive_eff"] - x_factors["n_offensive_eff"]
x_factors["possessions_increase"] = x_factors["y_possessions"] - x_factors["n_possessions"]
x_factors["ast_tov_ratio_increase"] = (
    (x_factors["y_ast_total"] / x_factors["y_tov_total"])
    - (x_factors["n_ast_total"] / x_factors["n_tov_total"])
)

x_factors["games_with"] = x_factors["y_num_games"]
x_factors["games_without"] = x_factors["n_num_games"]


columns = [
    "player_id", "team_id",
    "games_with", "games_without",
    "defensive_eff_improv", "offensive_eff_improv",
    "possessions_increase", "ast_tov_ratio_increase",
]

# Exclude players who have played less than 2 games or have been away for less than 2 games
at_least_two_games = (x_factors.games_with >= 2) & (x_factors.games_without >= 2)


# Sort
x_factors = x_factors[at_least_two_games].sort_values(
    ["defensive_eff_improv", "offensive_eff_improv"],
    ascending=[False, False],
)


# Write output
x_factors.to_csv(
    reports_dir / "x_factors.csv",
    float_format="%.2f",
    columns=columns,
    index=False,
)

In [None]:

# Player who has played for most teams

players = []
for player_id, group in player_logs.groupby(["player_id"]):
    num_teams = group["team_id"].unique().size
    players.append({
        "player_id": player_id,
        "num_teams": num_teams,
    })

players = pd.DataFrame(players)
at_least_two = players["num_teams"] >= 2
players[at_least_two].sort_values(["num_teams"], ascending=[False]).head(30)


In [None]:
player_logs["pir_enth_ratio"] = player_logs["pir"] / player_logs["enthusiasm_rating"]
filtered = player_logs["pir"] >= 5
player_logs[filtered].sort_values(["pir_enth_ratio"], ascending=[False])[
    ["game_id", "player_id", "team_id", "opponent_id", "enthusiasm_rating", "pir", "pir_enth_ratio"]
].head(20)

In [None]:
player_logs.sort_values(["enthusiasm_rating"], ascending=[False])[
    ["game_id", "player_id", "team_id", "opponent_id", "enthusiasm_rating", "pir"]
].head(20)