In [1]:
from collections import defaultdict
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:

game_csvs_folderpath = Path('game_csvs_na')
game_csvs_filepaths = list(game_csvs_folderpath.glob("*.csv"))

print(f"Found {len(game_csvs_filepaths)} csvs.")


Found 107 csvs.


In [3]:
game_dfs = [
    pd.read_csv(filepath, index_col=0)
    for filepath in game_csvs_filepaths
]

print(game_dfs[0])

                                            Allushin           Fireburner  \
goals                                              0                    1   
assists                                            0                    0   
shots                                              4                    1   
saves                                              1                    1   
demos                                              0                    4   
kickoff_goals                                      0                    0   
possession_duration                47.53009700557649   37.189610191317826   
boost_used                        2204.4734921863856   2587.7300036601396   
boost_per_minute                   420.0442592779991    493.0706295410405   
wasted_usage_percentage           0.1318221288220894  0.14013060779515585   
num_large_boosts                  20.006884809690835   21.912302410613773   
num_small_boosts                   86.69650084199361    91.46004484430097   

In [10]:
player_names = set()
for df in game_dfs:
    player_names.update(df.columns)
    
player_names = sorted(list(player_names))
print(f"Found {len(player_names)} players.")
print(player_names)

Found 24 players.
['Allushin', 'AxB', 'AyyJayy', 'Chicago', 'CorruptedG', 'Drippay', 'DudeWithTheNose', 'Fireburner', 'GarrettG', 'Gimmick', 'JKnaps', 'JWismont', 'KLASSUX', 'Karma', 'Kronovi ^-^', 'Lethamyr', 'Memory', 'Rizzo', 'Satthew', 'Squishy', 'Sypical', 'Torment', 'Wonder', 'jstn.']


In [9]:
player_names_transform = {
    'Tad': 'Tadpole',
    'gimmick': 'Gimmick',
    'justin': 'jstn.',
    'JWis': 'JWismont',
    'klassux': 'KLASSUX',
    'Nose God': 'DudeWithTheNose'
}

for df in game_dfs:
    df.columns = [player_names_transform.get(name, name) for name in  df.columns]


In [11]:
player_stats = defaultdict(lambda: defaultdict(list))  # stat: {player : [stats]}

for df in game_dfs:
    for stat in df.iterrows():
        stat_dict = player_stats[stat[0]]
        for player, stat_value in stat[1].iteritems():
            if stat_value == "False":
                value = False
            elif stat_value == "True":
                value = True
            else:
                value = float(stat_value)
            stat_dict[player].append(value)


In [12]:
for stat in player_stats.keys():
    # stat = 'average_boost_level'
    stats = player_stats[stat]
    print(stat)
    
    # values = []
    # for player_values in stats.values():
    #     values.extend(player_values)
    # plt.violinplot(values)
    
    
    players = list(stats.keys())
    data = list(stats.values())
    player_averages = [-sum(_data) / len(_data) for _data in data]
    
    players = [x for _, x in sorted(zip(player_averages, players))]
    data = [x for _, x in sorted(zip(player_averages, data))]
    
    fig = plt.figure(figsize=(10, 7))
    ax = sns.violinplot(data=data, inner='points', orient='h', fig=fig)
    sns.despine()
    ax.set_yticklabels(players)
    stat_name = stat.replace("_", " ").title()
    ax.set(xlabel=stat_name)
    plt.tight_layout()
    fig.savefig(f"stat_distributions/{stat}.png", dpi=300)
    plt.close()




goals
assists
shots
saves
demos
kickoff_goals
possession_duration
boost_used
boost_per_minute
wasted_usage_percentage
num_large_boosts
num_small_boosts
boost_ratio
stolen_boosts
time_full_boost
time_low_boost
time_no_boost
average_boost_level
average_speed
time_high_in_air
time_in_air
time_on_ground
time_at_slow_speed
time_at_boost_speed
time_at_super_sonic
time_in_attacking_half
time_in_defending_half
time_in_attacking_third
time_in_neutral_third
time_in_defending_third
time_on_wall
average_hit_distance
ball_hit_forward
ball_hit_forward_per_hit
time_close_to_ball
time_closest_to_ball
time_behind_center_of_mass
time_in_front_of_center_of_mass
time_most_back_player
time_between_players
time_most_forward_player
count_of_possessions
average_duration_of_possessions
hits_per_possession
shots_per_possession
goals_per_possession
saves_per_possession
passes_per_possession
aerials_per_possession
hits
aerials
aerial_efficiency
passes
dribbles
hit_goals
hit_shots
hit_saves
turnovers
turnovers_att