In [1]:
import pandas as pd

In [2]:
pbp = pd.read_csv("./Data/NBA-PBP-2018-2019.csv", dtype=str)
pbp.fillna('', inplace=True)

In [3]:
pbp = pbp.groupby(["Date", "HomeTeam", "AwayTeam"])

In [4]:
pbp_dict = dict(list(pbp))

In [5]:
for key in pbp_dict.keys():
    print(key)

('April 1 2019', 'BOS', 'MIA')
('April 1 2019', 'BRK', 'MIL')
('April 1 2019', 'DAL', 'PHI')
('April 1 2019', 'IND', 'DET')
('April 1 2019', 'MIN', 'POR')
('April 1 2019', 'NYK', 'CHI')
('April 1 2019', 'PHO', 'CLE')
('April 1 2019', 'TOR', 'ORL')
('April 1 2019', 'UTA', 'CHO')
('April 10 2019', 'ATL', 'IND')
('April 10 2019', 'BRK', 'MIA')
('April 10 2019', 'CHO', 'ORL')
('April 10 2019', 'DEN', 'MIN')
('April 10 2019', 'LAC', 'UTA')
('April 10 2019', 'MEM', 'GSW')
('April 10 2019', 'MIL', 'OKC')
('April 10 2019', 'NYK', 'DET')
('April 10 2019', 'PHI', 'CHI')
('April 10 2019', 'POR', 'SAC')
('April 10 2019', 'SAS', 'DAL')
('April 13 2019', 'DEN', 'SAS')
('April 13 2019', 'GSW', 'LAC')
('April 13 2019', 'PHI', 'BRK')
('April 13 2019', 'TOR', 'ORL')
('April 14 2019', 'BOS', 'IND')
('April 14 2019', 'HOU', 'UTA')
('April 14 2019', 'MIL', 'DET')
('April 14 2019', 'POR', 'OKC')
('April 15 2019', 'GSW', 'LAC')
('April 15 2019', 'PHI', 'BRK')
('April 16 2019', 'DEN', 'SAS')
('April 16 2019',

In [40]:
import json
from datetime import datetime
player_event_dict = {
    # Type, Outcome, Relation, Attr
    "TryPt": (["3-pt jump shot", "2-pt jump shot", "2-pt hook shot", "2-pt layup", "2-pt tip-in", "2-pt dunk"], ["make", "miss"], 
              ["AssistedBy", "BlockedBy"], ["Dist"]),
    "FreeThrow": ([], ["make", "miss"], ["Fouler"], []),
    "Turnover": (
        ["3 sec", "5 sec", "8 sec", "back court", "bad pass", "dbl dribble", "discontinued dribble", "illegal assist", "inbound",
         "lane violation", "lost ball", "off goaltending", "offensive foul", "out of bounds lost ball", "palming", "shot clock", 
         "step out of bounds", "traveling", "turnover"],
        [], ["StolenBy"], []),
    "Rebound": (["defensive", "offensive"], [], [], []),
    "Foul": (
        ["away from play", "clear path", "def 3 sec tech", "flagrant", "loose ball", "offensive", 
         "personal", "personal take", "shooting", "technical"],
        [], ["Fouled"], []
    ),
    "Enter": ([], [], ["Replace"], [])
}
player_relations = ["Jumpball", "Replace", "Assist", "Steal", "Block"]
def make_event(eName:str, ePlayer:str, eType:str=None, eOut:str=None, eRels:dict=None, eAttrs:dict=None):
    event = {}
    assert eName in player_event_dict, "Event name unrecognized, event name:{}".format(eName)
    event["name"] = eName
    event_metadata = player_event_dict[eName]
    event["player"] = ePlayer
    if eType is not None:
        assert eType in event_metadata[0], "Event type unrecognized, event:{}, type:{}".format(eName, eType)
    else:
        assert len(event_metadata[0]) == 0, "Event type missing"
    event["type"] = eType
    
    if eOut is not None:
        assert eOut in event_metadata[1], "Event outcome unrecognized"
    else:
        assert len(event_metadata[1]) == 0, "Event outcome missing"
    event["outcome"] = eOut
    
    # Event relations may not be essential
    if eRels is not None: 
        for rel in eRels:
            assert rel in event_metadata[2], "Event relation unrecognized, event:{}, relation:{}".format(eName, eRels)
    event["relations"] = eRels
    
    if eAttrs is not None:
        for attr in eAttrs:
            assert attr in event_metadata[3], "Event attributes unrecognized"
    event["attributes"] = eAttrs
    return event

class GameExtractor:
    def __init__(self):
        self.current_game = None
        self.player_dict = {}
        self.team_dict = {}
        self.events = []
        
    
    def read_game(self, game_record):
        game_info = {
            "HomeTeam": game_record.iloc[0]["HomeTeam"],
            "AwayTeam": game_record.iloc[0]["AwayTeam"],
            "WinningTeam": game_record.iloc[0]["WinningTeam"],
        }
        
        
        '''
        Code below is to eliminate contradictory player names
        Since some of the player name is wrong in suffix
        Like J. Embiid - BOS (Should be J. Embiid - PHI, the team name is wrongly placed)
        '''
        player_dict = [{}, {}]
        player_cols = ["Shooter", "Assister", "Blocker", "Fouler", "Fouled", "Rebounder", "TurnoverPlayer", "TurnoverCauser",
                       "JumpballAwayPlayer", "JumpballHomePlayer"]
        for _, row in game_record.iterrows():
            for col in player_cols:
                player = row[col]
                if player == "" or " - " not in player or player.startswith("Team"):
                    continue
                player_name, player_team = player.split(' - ')
                if player_team == game_info["HomeTeam"]:
                    cur_dict = player_dict[0]
                elif player_team == game_info["AwayTeam"]:
                    cur_dict = player_dict[1]
                else:
                    print("Invalid player encountered")
                    continue
                if player_name not in cur_dict:
                    cur_dict[player_name] = 0
                cur_dict[player_name] += 1
        
        '''
        Assume the wrong labeled player names is less than right labeled ones
        '''
        true_player_dict = {}
        for key in player_dict[0]:
            if key not in player_dict[1] or player_dict[0][key] >= player_dict[1][key]:
                true_player_dict[key] = key + " - " + game_info["HomeTeam"]
            else:
                true_player_dict[key] = key + " - " + game_info["AwayTeam"]
        for key in player_dict[1]:
            if key not in player_dict[0]:
                true_player_dict[key] = key + " - " + game_info["AwayTeam"]
        '''
        Replaces the wrong names
        '''
        for _, row in game_record.iterrows():
            for col in player_cols:
                player = row[col]
                if player == "" or " - " not in player or player.startswith("Team"):
                    continue
                player_name, player_team = player.split(' - ')
                row[col] = true_player_dict[player_name]
        
                    
        events = []
        for _, row in game_record.iterrows():
            event = self.read_event(row)
            if event is not None:
                events.append(event)
        # Handle free throw events
        for i in range(len(events)):
            if events[i]["name"] == "Foul":
                fouler = events[i]["player"]
                i += 1
                if i >= len(events):
                    break
                # Consider there would be some player switch during free throws
                while events[i]["name"] == "FreeThrow" or events[i]["name"] == "Enter":
                    if events[i]["name"] == "FreeThrow":
                        events[i]["relations"] = {"Fouler": fouler}
                    i += 1
                    if i >= len(events):
                        break
        
        game_info["events"] = events
        game_info["players"] = list(set([event["player"] for event in events]))
        return game_info

    def read_event(self, row):
        play = row["HomePlay"] + row["AwayPlay"]
        if "makes" in play or "misses" in play:
            if "pt" in play:
                rel = None
                if row["Assister"] != "":
                    rel = {"AssistedBy": row["Assister"]}
                if row["Blocker"] != "":
                    rel = {"BlockedBy": row["Blocker"]}
                event = make_event("TryPt", row["Shooter"], row["ShotType"], row["ShotOutcome"], rel, 
                                   {"Dist": int(row["ShotDist"])})
            elif "free throw" in play:
                event = make_event("FreeThrow", row["FreeThrowShooter"], None, row["FreeThrowOutcome"], None, None)
        elif "rebound" in play:
            # Seems when the first free throw finished, always have a "Offensive rebound by Team, so just ignore that"
            if "Team" in play:
                return None
            event = make_event("Rebound", row["Rebounder"], row["ReboundType"], None, None, None)
        elif "Turnover" in play:
            rel = None
            if row["TurnoverCause"] == "steal":
                rel = {"StolenBy": row["TurnoverCauser"]}
            event = make_event("Turnover", row["TurnoverPlayer"], row["TurnoverType"], None, rel, None)
        elif "foul" in play:
            rel = {"Fouled": row["Fouled"]}
            event = make_event("Foul", row["Fouler"], row["FoulType"], None, rel, None)
        elif "enters" in play:
            rel = {"Replace": row["LeaveGame"]}
            event = make_event("Enter", row["EnterGame"], None, None, rel, None)
        else:
            event = None
        return event
    
    def extract_all(self, pbp_dict:dict, extract_path:str):
        n_games = len(pbp_dict)
        print("Total Games: {}".format(n_games))
        for i, key in enumerate(pbp_dict):
            key_name = datetime.strptime(key[0], "%B %d %Y").strftime("%Y%m%d") + "-" + key[1] + "-" + key[2]
            print("Parse {}th game".format(i))
            game = self.read_game(pbp_dict[key])
            with open(extract_path + key_name + ".json", "w") as f:
                json.dump(game, f)
                print("Saved {}th game".format(i))

In [41]:
reader = GameExtractor()

In [42]:
record = pbp_dict[('April 1 2019', 'BOS', 'MIA')]

In [43]:
game = reader.read_game(record)

In [44]:
game

{'HomeTeam': 'BOS',
 'AwayTeam': 'MIA',
 'WinningTeam': 'BOS',
 'events': [{'name': 'TryPt',
   'player': 'J. Tatum - BOS',
   'type': '3-pt jump shot',
   'outcome': 'miss',
   'relations': None,
   'attributes': {'Dist': 23}},
  {'name': 'Rebound',
   'player': 'D. Jones - MIA',
   'type': 'defensive',
   'outcome': None,
   'relations': None,
   'attributes': None},
  {'name': 'TryPt',
   'player': 'K. Olynyk - MIA',
   'type': '3-pt jump shot',
   'outcome': 'miss',
   'relations': None,
   'attributes': {'Dist': 26}},
  {'name': 'Rebound',
   'player': 'K. Irving - BOS',
   'type': 'defensive',
   'outcome': None,
   'relations': None,
   'attributes': None},
  {'name': 'TryPt',
   'player': 'J. Tatum - BOS',
   'type': '2-pt jump shot',
   'outcome': 'miss',
   'relations': None,
   'attributes': {'Dist': 20}},
  {'name': 'Rebound',
   'player': 'M. Smart - BOS',
   'type': 'offensive',
   'outcome': None,
   'relations': None,
   'attributes': None},
  {'name': 'TryPt',
   'play

In [45]:
tatumshots = [event for event in game["events"] if event["player"] == "J. Tatum - BOS" and event["name"]=="TryPt"]

In [46]:
len([shot for shot in tatumshots if shot['outcome']=='make']), len(tatumshots)

(7, 15)

In [47]:
[shot for shot in tatumshots if shot['outcome']=='make' and shot['relations'] is not None]

[{'name': 'TryPt',
  'player': 'J. Tatum - BOS',
  'type': '2-pt dunk',
  'outcome': 'make',
  'relations': {'AssistedBy': 'A. Horford - BOS'},
  'attributes': {'Dist': 0}},
 {'name': 'TryPt',
  'player': 'J. Tatum - BOS',
  'type': '3-pt jump shot',
  'outcome': 'make',
  'relations': {'AssistedBy': 'M. Morris - BOS'},
  'attributes': {'Dist': 24}},
 {'name': 'TryPt',
  'player': 'J. Tatum - BOS',
  'type': '3-pt jump shot',
  'outcome': 'make',
  'relations': {'AssistedBy': 'G. Hayward - BOS'},
  'attributes': {'Dist': 23}},
 {'name': 'TryPt',
  'player': 'J. Tatum - BOS',
  'type': '3-pt jump shot',
  'outcome': 'make',
  'relations': {'AssistedBy': 'A. Horford - BOS'},
  'attributes': {'Dist': 25}},
 {'name': 'TryPt',
  'player': 'J. Tatum - BOS',
  'type': '2-pt jump shot',
  'outcome': 'make',
  'relations': {'AssistedBy': 'A. Horford - BOS'},
  'attributes': {'Dist': 15}}]

In [48]:
[event for event in game["events"] if event["name"] == "FreeThrow"]

[{'name': 'FreeThrow',
  'player': 'J. Tatum - BOS',
  'type': None,
  'outcome': 'make',
  'relations': {'Fouler': 'D. Waiters - MIA'},
  'attributes': None},
 {'name': 'FreeThrow',
  'player': 'J. Tatum - BOS',
  'type': None,
  'outcome': 'miss',
  'relations': {'Fouler': 'D. Waiters - MIA'},
  'attributes': None},
 {'name': 'FreeThrow',
  'player': 'J. Johnson - MIA',
  'type': None,
  'outcome': 'miss',
  'relations': {'Fouler': 'M. Morris - BOS'},
  'attributes': None},
 {'name': 'FreeThrow',
  'player': 'G. Hayward - BOS',
  'type': None,
  'outcome': 'make',
  'relations': {'Fouler': 'G. Dragic - MIA'},
  'attributes': None},
 {'name': 'FreeThrow',
  'player': 'G. Hayward - BOS',
  'type': None,
  'outcome': 'make',
  'relations': {'Fouler': 'G. Dragic - MIA'},
  'attributes': None},
 {'name': 'FreeThrow',
  'player': 'J. Johnson - MIA',
  'type': None,
  'outcome': 'make',
  'relations': {'Fouler': 'A. Baynes - BOS'},
  'attributes': None},
 {'name': 'FreeThrow',
  'player': '

In [49]:
[event for event in game["events"] if event["name"] == "Enter"]

[{'name': 'Enter',
  'player': 'J. Johnson - MIA',
  'type': None,
  'outcome': None,
  'relations': {'Replace': 'B. Adebayo - MIA'},
  'attributes': None},
 {'name': 'Enter',
  'player': 'D. Wade - MIA',
  'type': None,
  'outcome': None,
  'relations': {'Replace': 'K. Olynyk - MIA'},
  'attributes': None},
 {'name': 'Enter',
  'player': 'H. Whiteside - MIA',
  'type': None,
  'outcome': None,
  'relations': {'Replace': 'D. Waiters - MIA'},
  'attributes': None},
 {'name': 'Enter',
  'player': 'G. Hayward - BOS',
  'type': None,
  'outcome': None,
  'relations': {'Replace': 'A. Horford - BOS'},
  'attributes': None},
 {'name': 'Enter',
  'player': 'M. Morris - BOS',
  'type': None,
  'outcome': None,
  'relations': {'Replace': 'M. Smart - BOS'},
  'attributes': None},
 {'name': 'Enter',
  'player': 'R. Anderson - MIA',
  'type': None,
  'outcome': None,
  'relations': {'Replace': 'D. Jones - MIA'},
  'attributes': None},
 {'name': 'Enter',
  'player': 'T. Rozier - BOS',
  'type': None

In [50]:
reader.extract_all(pbp_dict, "./Data/Games/")

Total Games: 1312
Parse 0th game
Saved 0th game
Parse 1th game
Saved 1th game
Parse 2th game
Saved 2th game
Parse 3th game
Saved 3th game
Parse 4th game
Saved 4th game
Parse 5th game
Saved 5th game
Parse 6th game
Saved 6th game
Parse 7th game
Saved 7th game
Parse 8th game
Saved 8th game
Parse 9th game
Saved 9th game
Parse 10th game
Saved 10th game
Parse 11th game
Saved 11th game
Parse 12th game
Saved 12th game
Parse 13th game
Saved 13th game
Parse 14th game
Saved 14th game
Parse 15th game
Saved 15th game
Parse 16th game
Saved 16th game
Parse 17th game
Saved 17th game
Parse 18th game
Saved 18th game
Parse 19th game
Saved 19th game
Parse 20th game
Saved 20th game
Parse 21th game
Saved 21th game
Parse 22th game
Saved 22th game
Parse 23th game
Saved 23th game
Parse 24th game
Saved 24th game
Parse 25th game
Saved 25th game
Parse 26th game
Saved 26th game
Parse 27th game
Saved 27th game
Parse 28th game
Saved 28th game
Parse 29th game
Saved 29th game
Parse 30th game
Saved 30th game
Parse 31th

Parse 248th game
Saved 248th game
Parse 249th game
Saved 249th game
Parse 250th game
Saved 250th game
Parse 251th game
Saved 251th game
Parse 252th game
Saved 252th game
Parse 253th game
Saved 253th game
Parse 254th game
Saved 254th game
Parse 255th game
Saved 255th game
Parse 256th game
Saved 256th game
Parse 257th game
Saved 257th game
Parse 258th game
Saved 258th game
Parse 259th game
Saved 259th game
Parse 260th game
Saved 260th game
Parse 261th game
Saved 261th game
Parse 262th game
Saved 262th game
Parse 263th game
Saved 263th game
Parse 264th game
Saved 264th game
Parse 265th game
Saved 265th game
Parse 266th game
Saved 266th game
Parse 267th game
Saved 267th game
Parse 268th game
Saved 268th game
Parse 269th game
Saved 269th game
Parse 270th game
Saved 270th game
Parse 271th game
Saved 271th game
Parse 272th game
Saved 272th game
Parse 273th game
Saved 273th game
Parse 274th game
Saved 274th game
Parse 275th game
Saved 275th game
Parse 276th game
Saved 276th game
Parse 277th ga

Saved 489th game
Parse 490th game
Saved 490th game
Parse 491th game
Saved 491th game
Parse 492th game
Saved 492th game
Parse 493th game
Saved 493th game
Parse 494th game
Saved 494th game
Parse 495th game
Saved 495th game
Parse 496th game
Saved 496th game
Parse 497th game
Saved 497th game
Parse 498th game
Saved 498th game
Parse 499th game
Saved 499th game
Parse 500th game
Saved 500th game
Parse 501th game
Saved 501th game
Parse 502th game
Saved 502th game
Parse 503th game
Saved 503th game
Parse 504th game
Saved 504th game
Parse 505th game
Saved 505th game
Parse 506th game
Saved 506th game
Parse 507th game
Saved 507th game
Parse 508th game
Saved 508th game
Parse 509th game
Saved 509th game
Parse 510th game
Saved 510th game
Parse 511th game
Saved 511th game
Parse 512th game
Saved 512th game
Parse 513th game
Saved 513th game
Parse 514th game
Saved 514th game
Parse 515th game
Saved 515th game
Parse 516th game
Saved 516th game
Parse 517th game
Saved 517th game
Parse 518th game
Saved 518th ga

Saved 730th game
Parse 731th game
Saved 731th game
Parse 732th game
Saved 732th game
Parse 733th game
Saved 733th game
Parse 734th game
Saved 734th game
Parse 735th game
Saved 735th game
Parse 736th game
Saved 736th game
Parse 737th game
Saved 737th game
Parse 738th game
Saved 738th game
Parse 739th game
Saved 739th game
Parse 740th game
Saved 740th game
Parse 741th game
Saved 741th game
Parse 742th game
Saved 742th game
Parse 743th game
Saved 743th game
Parse 744th game
Saved 744th game
Parse 745th game
Saved 745th game
Parse 746th game
Saved 746th game
Parse 747th game
Saved 747th game
Parse 748th game
Saved 748th game
Parse 749th game
Saved 749th game
Parse 750th game
Saved 750th game
Parse 751th game
Saved 751th game
Parse 752th game
Saved 752th game
Parse 753th game
Saved 753th game
Parse 754th game
Saved 754th game
Parse 755th game
Saved 755th game
Parse 756th game
Saved 756th game
Parse 757th game
Saved 757th game
Parse 758th game
Saved 758th game
Parse 759th game
Saved 759th ga

Saved 972th game
Parse 973th game
Saved 973th game
Parse 974th game
Saved 974th game
Parse 975th game
Saved 975th game
Parse 976th game
Saved 976th game
Parse 977th game
Saved 977th game
Parse 978th game
Saved 978th game
Parse 979th game
Saved 979th game
Parse 980th game
Saved 980th game
Parse 981th game
Saved 981th game
Parse 982th game
Saved 982th game
Parse 983th game
Saved 983th game
Parse 984th game
Saved 984th game
Parse 985th game
Saved 985th game
Parse 986th game
Saved 986th game
Parse 987th game
Saved 987th game
Parse 988th game
Saved 988th game
Parse 989th game
Saved 989th game
Parse 990th game
Saved 990th game
Parse 991th game
Saved 991th game
Parse 992th game
Saved 992th game
Parse 993th game
Saved 993th game
Parse 994th game
Saved 994th game
Parse 995th game
Saved 995th game
Parse 996th game
Saved 996th game
Parse 997th game
Saved 997th game
Parse 998th game
Saved 998th game
Parse 999th game
Saved 999th game
Parse 1000th game
Saved 1000th game
Parse 1001th game
Saved 1001t

Parse 1202th game
Saved 1202th game
Parse 1203th game
Saved 1203th game
Parse 1204th game
Saved 1204th game
Parse 1205th game
Saved 1205th game
Parse 1206th game
Saved 1206th game
Parse 1207th game
Saved 1207th game
Parse 1208th game
Saved 1208th game
Parse 1209th game
Saved 1209th game
Parse 1210th game
Saved 1210th game
Parse 1211th game
Saved 1211th game
Parse 1212th game
Saved 1212th game
Parse 1213th game
Saved 1213th game
Parse 1214th game
Saved 1214th game
Parse 1215th game
Saved 1215th game
Parse 1216th game
Saved 1216th game
Parse 1217th game
Saved 1217th game
Parse 1218th game
Saved 1218th game
Parse 1219th game
Saved 1219th game
Parse 1220th game
Saved 1220th game
Parse 1221th game
Saved 1221th game
Parse 1222th game
Saved 1222th game
Parse 1223th game
Saved 1223th game
Parse 1224th game
Saved 1224th game
Parse 1225th game
Saved 1225th game
Parse 1226th game
Saved 1226th game
Parse 1227th game
Saved 1227th game
Parse 1228th game
Saved 1228th game
Parse 1229th game
Saved 1229