In [1]:
import importlib
import statsapi
import database; importlib.reload(database)

<module 'database' from 'c:\\Users\\Walter\\Desktop\\projects\\moneyball\\data\\database.py'>

### Creating a new database and populating it with a season

In [6]:
if 'db' in locals(): db.con.close()
db = database.make_fresh_database()

In [7]:
database.collect_season_data(db, 2018)

Fetching game schedule for 2018 ...
Fetching team roster 30/30 ...


In [8]:
season_teams = database.query_teams_in_season(db, 2018)
player_ids = set()
for team_roster in database.query_team_rosters(db, 2018).values():
    player_ids.update(team_roster)

database.update_players_data(db, player_ids)

Fetching player stats 1379/1379 ...


---

### Determine player stat keys

In [2]:
team_id = 118
res = statsapi.get('team_roster', {'teamId': team_id, 'season': 2018})
players = [player['person']['id'] for player in res['roster']]
player_ress = [statsapi.player_stat_data(player, type="career") for player in players]

In [3]:
player_stats = dict()
for player_res in player_ress:
    # extract stat groups for each player
    player_stats[player_res['id']] = dict()
    for stats_group in player_res['stats']:
        # extract the group's name and stats
        group_name = stats_group['group']
        group_stats = stats_group['stats'].copy()

        # fielding is further divided by position, with a type + code
        if group_name.lower() == 'fielding':
            field_code = group_stats['position']['code']
            group_name = f"{group_name}_{int(field_code)-1}"

            del group_stats['position']

        player_stats[player_res['id']][group_name] = group_stats

In [4]:
stat_group_keys = dict()
player_ids = list(player_stats.keys())
for group_name, stat_group in player_stats[player_ids[0]].items():
    stat_group_keys[group_name] = set(stat_group.keys())
for player_id in player_ids:
    for group_name, stat_group in player_stats[player_id].items():
        if group_name not in stat_group_keys:
            stat_group_keys[group_name] = set(stat_group.keys())
        elif set(stat_group.keys()) != stat_group_keys[group_name]:
            print(f"Group Keys did not match {player_id}, {group_name}")
            print(set(stat_group.keys()))
            print(stat_group_keys[group_name])
            print("\n--------------------------------------------")

In [5]:
import json

with open(database.PLAYER_DATADEF_FILE, 'w') as file:
    player_datadef = dict()
    (stat_groups := [stat_group for stat_group in stat_group_keys.keys()]).sort()
    for stat_group in stat_groups:
        player_datadef[stat_group] = list(stat_group_keys[stat_group])
    json.dump(player_datadef, file, indent=4)