In [2]:
import pickle
import json

In [86]:
position_replace_subkey_mapping = {
    "QB": " PASSING RUSHING",
    "RB": " RUSHING RECEIVING FUMBLES",
    "WR": " RECEIVING RUSHING FUMBLES",
    "TE": " RECEIVING RUSHING FUMBLES",
}

dict_keys = {
    "info": ["team", "number", "position"],
    "postseason": ["date", "vs_#", "opponent", "outcome", "score", "stats", "game_title"],
    "2021regularseason": ["date", "vs_#", "opponent", "outcome", "score", "stats"]
}

position_stats_columns = {
    "QB": {
        "passing": ['CMP', 'ATT', 'YDS', 'CMP%', 'AVG', 'TD', 'INT', 'LNG', 'SACK', 'RTG', 'QBR'],
        "rushing": ['ATT', 'YDS', 'AVG', 'TD', 'LNG']
    },
    "RB": {
        "rushing": ['ATT', 'YDS', 'AVG', 'TD', 'LNG'],
        "receiving": ['REC', 'TGTS', 'YDS', 'AVG', 'TD', 'LNG'],
        "fumbles": [ 'FUM', 'LST', 'FF', 'KB']
    },
    "WR": {
        "receiving": ['REC', 'TGTS', 'YDS', 'AVG', 'TD', 'LNG'],
        "rushing": ['ATT', 'YDS', 'AVG', 'LNG', 'TD'],
        "fumbles": ['FUM', 'LST', 'FF', 'KB']
    },
    "TE": {
        "receiving": ['REC', 'TGTS', 'YDS', 'AVG', 'TD', 'LNG'],
        "rushing": ['ATT', 'YDS', 'AVG', 'LNG', 'TD'],
        "fumbles": ['FUM', 'LST', 'FF', 'KB']
    }
}

In [163]:
def create_stats_dict(stats_list, columns):
    """
    
    """
    stats_with_col_names = []
    for stats in stats_list:
        stats = stats.split(" ")

        idx = 0
        stats_dict = {}
        for col_set, column_names in columns.items():
            for col in column_names:
                stat = stats[idx]
                stats_dict[f"{col_set}_{col}"] = stat
                idx += 1
        stats_with_col_names.append(stats_dict)
    return stats_with_col_names

In [161]:
f = open('data/game_level.pkl', 'rb')
player_data = pickle.load(f)

for i, player in enumerate(player_data):
    player_data[i] = [table.split("\n") for table in player]

player_data_dict = {player[0][0]: player[1:] for player in player_data}

for key, value in player_data_dict.items():
    info = value[0]
    position = info[-1]
    player_data_dict[key] = {"info": info}

    for table in value[1:]:
        subkey = table[0].replace(position_replace_subkey_mapping[position], "")
        subkey = subkey.replace(" ", "").lower()
        player_data_dict[key][subkey] = table


for player, info in player_data_dict.items():
    for key, data in info.items():
        if key != 'info':
            if key == 'postseason':
                mod = 7
            if key == '2021regularseason':
                mod = 6
            last_entry = data[-1].split(" ")[0]
            game_data = data[5:]
            if last_entry == "POSTSEASON" or last_entry == "REGULAR":
                game_data = game_data[:-1]
            game_groups = [[] for i in range(mod)]
            for i, item in enumerate(game_data):
                game_groups[i % mod].append(item)
            player_data_dict[player][key] = game_groups



for player, data in player_data_dict.items():
    for key, table in data.items():
        player_data_dict[player][key] = {dict_keys[key][i]: item for i, item in enumerate(table)}

for player, data in player_data_dict.items():
    for key, table in data.items():
        if key == 'info':
            pos = table['position']
        if key == "postseason" or key == "2021regularseason":

            stats = create_stats_dict(table['stats'], position_stats_columns[pos])
            player_data_dict[player][key]['stats'] = stats

json.dump(
    player_data_dict, 
    open("data/player_data_dict.json", "w"), 
    indent = 4)