# WNBL Plotting

Demonstration of loading multiple csv files and plotting various features.  

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import wnbl

data_path = ".\\data"

In [None]:
def boxplot_sorted_by_position(df, by, column, position, rot=0):
    # use dict comprehension to create new dataframe from the iterable groupby object
    # each group name becomes a column in the new dataframe
    df2 = pd.DataFrame({col:vals[column] for col, vals in df.groupby(by)})
    # find and sort the median values in this new dataframe
    # meds = df2.median().sort_values()
    pos = position['Position'].sort_values(ascending=False)
    colnames = position.iloc[pos.index]['Team']
    # use the columns in the dataframe, ordered sorted by median value
    # return axes so changes can be made outside the function
    return df2[colnames].boxplot(rot=rot, return_type="axes", vert=False)

In [None]:
def boxplot_sorted_by_median(df, by, column, rot=0):
    # use dict comprehension to create new dataframe from the iterable groupby object
    # each group name becomes a column in the new dataframe
    df2 = pd.DataFrame({col:vals[column] for col, vals in df.groupby(by)})
    # find and sort the median values in this new dataframe
    meds = df2.median().sort_values()
    # use the columns in the dataframe, ordered sorted by median value
    # return axes so changes can be made outside the function
    return df2[meds.index].boxplot(rot=rot, return_type="axes", vert=False)

In [None]:
# concatenate all teams by season info, drop duplicates to define unique TeamID and name mapping
teams_df = wnbl.load_csv(data_path,'teams')
teams_df.info()

In [None]:
# concatenate all players by season stats
players_df = wnbl.load_csv(data_path,'players',include_id=True)
players_df.info()

In [None]:
# concatenate all standings by seasons
standings_df = wnbl.load_csv(data_path,'standings',include_id=True)

# remove logo column (NaN)
standings_df = standings_df.drop(standings_df.columns[1], axis=1)
standings_df.info()

In [None]:
# load seasons (maps year to competition)
# seasons_df = pd.read_csv(os.path.join(data_path,'seasons.csv'))
seasons_df = wnbl.load_csv(data_path,'seasons.csv')

# identify current seasons
comp_id = seasons_df['CompID'].max()

In [None]:
seasons_df.info()

In [None]:
# merge players and seasons so we can plot by date
players_df = players_df.merge(seasons_df,on='CompID',how='left')

In [None]:
players_df = players_df.merge(teams_df,on='TeamID',how='left')
# players_df = players_df.rename(columns={'Team_y':'Team'})

In [None]:
players_df.info()

In [None]:
# associate team full name with standings
standings_df = standings_df.merge(teams_df,on='TeamID',how='left')
standings_df = standings_df.rename(columns={'Team_y':'Team'})

In [None]:
# New features
da = players_df

# efficiency
da['EFF'] = da['PTS'] + da['REB'] + da['AST'] + da['STL'] + da['BLK'] - (da['FGA'] - da['FGM']) - (da['3PA'] - da['3PM'])  - (da['FTA'] - da['FTM']) - da['TO']

# efficiency per game
da['EPG'] = da['EFF'] / da['G']

In [None]:
# filter data by comp

tmp = da[da['CompID']==comp_id]
tmp = tmp[['Team','EPG']]
tmp.info()

In [None]:
# identify outliers, filter by current comp

# team_q1 = tmp.groupby('Team').quantile(0.25)['EPG'].to_numpy()
# team_q3 = tmp.groupby('Team').quantile(0.75)['EPG'].to_numpy()
# outlier_top_lim = team_q3 + 1.5 * (team_q3 - team_q1)
# outlier_bottom_lim = team_q1 - 1.5 * (team_q3 - team_q1)

In [None]:
# Plot - need position from standings

# filter standings by competition to match stats
dfs = standings_df[standings_df['CompID']==comp_id].reset_index()

ax = boxplot_sorted_by_position(tmp, by=['Team'], column="EPG",position=dfs)
ax.set(xlabel='EPG',ylabel='Team',title='WNBL 2020 Distribution of Player Efficiency Per Game\nOrdered by Standings Dec 1st 2020')

ax.figure.set_size_inches(10,5)
ax.figure.savefig('wnbl20_team_standings_boxplot.png',bbox_inches='tight')

In [None]:
# Plot ordered by median EPG
ax = boxplot_sorted_by_median(tmp, by=['Team'], column="EPG")
ax.set(xlabel='EPG',ylabel='Team',title='WNBL 2020 Distribution of Player Efficiency Per Game\nOrdered by Decreasing Team EPG Median')

ax.figure.set_size_inches(10,5)
ax.figure.savefig('wnbl20_team_median_boxplot.png',bbox_inches='tight')

In [None]:

# multiline plot
# tmp =da[['Player','Year','EPG']]
# fig, ax = plt.subplots(figsize=(8,6))
# for label, df in tmp.sort_values('Year').groupby('Player'):
#     df.plot(x='Year',y='EPG', ax=ax, label=label)
# plt.legend(loc='upper left')

In [None]:
# identify top player per team in current season
tmp = da[['Player','Team','Year','EPG','CompID','TeamID']]
tmp = tmp[tmp['CompID']==comp_id]
top_players = tmp.sort_values(['TeamID', 'EPG'], axis=0).groupby('TeamID').tail(1)['Player']

# identify current team for these top players
current_teams = tmp[tmp['Player'].isin(top_players)][['Player','Team']]


In [None]:
# filter top player data
tmp = da[['Player','Team','Year','EPG']]
tmp = tmp[tmp['Player'].isin(top_players)]
# tmp['Player-Team'] = tmp['Player'].str.cat(tmp['Team'],sep=" ")
tmp = tmp.merge(current_teams,on='Player')
tmp['Player-Team'] = tmp['Player'] + ' (' + tmp['Team_y'] + ') '


In [None]:
# facet plot of top player historial EPG
import seaborn as sns

tmp = da[['Player','Team','Year','EPG']]
tmp = tmp[tmp['Player'].isin(top_players)]
# tmp['Player-Team'] = tmp['Player'].str.cat(tmp['Team'],sep=" ")
tmp = tmp.merge(current_teams,on='Player')
tmp['Player-Team'] = tmp['Player'] + '\n(' + tmp['Team_y'] + ') '

# Create a grid : initialize it
g = sns.FacetGrid(tmp.sort_values('Year'), col='Player-Team', hue='Player-Team', col_wrap=4)
 
# Add the line over the area with the plot function
g = g.map(plt.plot, 'Year', 'EPG')
 
# Fill the area with fill_between
g = g.map(plt.fill_between, 'Year', 'EPG', alpha=0.2).set_titles("{col_name} Player-Team")
 
# Control the title of each facet
g = g.set_titles("{col_name}")
 
# Add a title for the whole plot
plt.subplots_adjust(top=0.85)
sns.set(rc={'figure.figsize':(12,8)})
g = g.fig.suptitle('Historical Performance of Top WNBL Players Per Team in 2020')

plt.text(2020,-13,"linkedin.com/in/evassiliadis",ha='right',alpha=0.5)
plt.text(2020,-16,"github.com/vass1138/wnbl",ha='right',alpha=0.5)
plt.savefig('wnbl_top_players_history.png')
plt.show()