In [33]:
import json
import os
import pandas as pd
from src.mtbl_globals import DIR_EXTRACT, DIR_TRANSFORM

## Load Player Data

In [34]:
positions = {}
files = ['mtbl_c.json', 'mtbl_1b.json', 'mtbl_2b.json', 'mtbl_3b.json', 'mtbl_ss.json', 'mtbl_of.json', 'mtbl_dh.json', 'mtbl_rp.json', 'mtbl_sp.json']
for file in os.listdir(DIR_TRANSFORM):
    if file in files:
        file_path = os.path.join(DIR_TRANSFORM, file)
        with open(file_path, 'rb') as f:
            data = json.load(f)['data']
            
        pos = file.split('_')[1].split('.')[0]
            
        df = pd.DataFrame(data)
        positions[pos] = df

## Load Rosters and Team Data

In [35]:
# roster data does not have schema
with open(os.path.join(DIR_EXTRACT, 'lg_rosters.json'), 'rb') as f:
    data = json.load(f)
rosters = pd.json_normalize(data).set_index('abbrv')

# manager data does have schema
with open(os.path.join(DIR_EXTRACT, 'lg_managers.json'), 'rb') as f:
    data = json.load(f)['data']
teams = pd.DataFrame(data).set_index('teamAbbreviation')

## Add Owner Column to Player Data

In [36]:
for pos, df in positions.items():
    # Merge the position dataframe with the rosters dataframe
    for idx, player_row in df.iterrows():
        roster = rosters.isin([player_row['ESPNID']]).any(axis=1)
        
        if len(rosters[roster].index.values) > 0:
            team = rosters[roster].index.values[0]
            df.loc[idx, 'owner'] = team
        else:
            df.loc[idx, 'owner'] = 'FA'

    # Update the position dataframe in the positions dictionary
    positions[pos] = df

In [37]:
rps = positions['rp']
rps.head()

Unnamed: 0,ESPNID,FANGRAPHSID,MLBID,name,team,positions,proj_G,proj_GS,proj_IP,proj_SVHD,...,z_proj_WHIP,z_proj_K/9,z_proj_SVHD,z_total,IP_shekels,ERA_shekels,WHIP_shekels,K/9_shekels,SVHD_shekels,shekels
0,4730225,31757,695243,Mason Miller,OAK,"[SP, RP]",23,0,38.3,19,...,3.012608,2.8852,1.965238,13.897244,2.700179,4.79556,4.265587,7.998159,3.539093,23.298578
1,32760,14212,623352,Josh Hader,HOU,[RP],20,0,33.1,21,...,2.625339,2.62374,2.108261,11.757661,1.833066,3.771257,3.717249,7.273357,3.796656,20.391585
2,40939,20373,662253,Andres Munoz,SEA,[RP],20,0,32.6,19,...,2.625339,2.016173,1.965238,10.807839,1.358656,4.389717,3.717249,5.589101,3.539093,18.593816
3,41743,21032,661403,Emmanuel Clase,CLE,[RP],21,0,35.5,23,...,2.625339,0.0,2.237825,10.165644,2.353573,4.252708,3.717249,0.0,4.02998,14.35351
4,39909,18138,664854,Ryan Helsley,STL,[RP],22,0,35.8,22,...,2.235405,0.708959,2.174491,9.970294,2.39726,3.399213,3.165136,1.965329,3.915926,14.842865


In [38]:
ss = positions['ss']
ss.head()

Unnamed: 0,ESPNID,FANGRAPHSID,MLBID,name,team,positions,proj_G,proj_PA,proj_AB,proj_H,...,z_proj_SLG,z_total,pri_pos,R_shekels,HR_shekels,RBI_shekels,SBN_shekels,OBP_shekels,SLG_shekels,shekels
0,42403,25764,677951,Bobby Witt Jr.,KC,[SS],97,428,391,58,...,2.192948,10.342932,SS,6.978231,7.709632,3.204611,10.462874,5.474879,6.678136,40.508362
1,32691,13624,608369,Corey Seager,TEX,[SS],88,385,338,50,...,2.201143,7.781055,SS,3.580818,7.709632,2.974566,0.289526,6.634477,6.703091,27.89211
2,32129,12916,596019,Francisco Lindor,NYM,[SS],99,431,383,51,...,1.333119,7.183198,SS,4.500747,6.007183,2.324471,7.16391,4.208761,4.059719,28.264791
3,4917694,26668,682829,Elly De La Cruz,CIN,"[SS, 3B]",94,409,366,46,...,0.966768,6.340667,SS,4.206975,2.883537,1.491304,11.037381,2.883324,2.944076,25.446597
4,33675,15986,642715,Willy Adames,MIL,[SS],98,419,372,47,...,1.383731,5.814752,SS,2.512202,6.007183,2.974566,1.750484,3.876575,4.213846,21.334856
