In [None]:
from pathlib import Path

import pandas as pd


reports_dir = Path("../../reports")

In [None]:
player_logs = pd.read_csv(reports_dir / "player_logs.csv")

In [None]:
team_logs = pd.read_csv(reports_dir / "team_logs.csv")

In [None]:
# Add "outcome" column
team_logs.loc[team_logs.pts > team_logs.opponent_pts, "outcome"] = "W"  # win
team_logs.loc[team_logs.pts == team_logs.opponent_pts, "outcome"] = "T"  # tie
team_logs.loc[team_logs.pts < team_logs.opponent_pts, "outcome"] = "D"  # defeat

In [None]:
# To compare the efffect of a player's presence vs absence, we need to know all players who have played for a team
# in a season. We create a column for each player and put a "Y" if they played that game for the particular team
# or "OPP" if they were the opponent

for (game_id, team_id), group in player_logs.groupby(["game_id", "team_id"]):
    is_this_team = (team_logs.game_id == game_id) & (team_logs.team_id == team_id)
    is_other_team = (team_logs.game_id == game_id) & (team_logs.opponent_id == team_id)
    for player_id in group["player_id"]:
        player_id_column = f"p_{player_id.replace('-', '_')}"
        team_logs.loc[is_this_team, player_id_column] = "Y"
        team_logs.loc[is_other_team, player_id_column] = "OPP"

In [None]:
# Write output
team_logs.to_csv(reports_dir / "team_logs_with_players.csv")

In [None]:

# Player who has played for most teams

players = []
for player_id, group in player_logs.groupby(["player_id"]):
    num_teams = group["team_id"].unique().size
    players.append({
        "player_id": player_id,
        "num_teams": num_teams,
    })

players = pd.DataFrame(players)
at_least_two = players["num_teams"] >= 2
players[at_least_two].sort_values(["num_teams"], ascending=[False]).head(30)


In [None]:
player_logs["pir_enth_ratio"] = player_logs["pir"] / player_logs["enthusiasm_rating"]
filtered = player_logs["pir"] >= 5
player_logs[filtered].sort_values(["pir_enth_ratio"], ascending=[False])[
    ["game_id", "player_id", "team_id", "opponent_id", "enthusiasm_rating", "pir", "pir_enth_ratio"]
].head(20)

In [None]:
player_logs.sort_values(["enthusiasm_rating"], ascending=[False])[
    ["game_id", "player_id", "team_id", "opponent_id", "enthusiasm_rating", "pir"]
].head(20)