In [80]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns

In [81]:
nhl = pd.read_csv('nhl.csv', encoding= 'latin-1')
nhl = nhl.rename(
    {'G' :'total_goals',
    'A' : 'total_assists',
    'PTS' : 'total_points',
    'EV' : 'even_goals',
    'PP': 'pp_goals',
    'SH': 'sh_goals',
    'GW': 'gw_goals',
    'EV.1':'even_assists',
    'PP.1': 'pp_assists',
    'SH.1': 'sh_assists',
    'S': 'shots'} , axis = 1)

wrangling_columns = ['Player', 
                'GP', 
                'PIM', 
                'PS',
                'total_goals',
                'total_points', 
                'even_goals', 
                'pp_goals', 
                # 'sh_goals', 
                'gw_goals', 
                'even_assists', 
                'pp_assists', 
                # 'sh_assists', 
                'shots', 
                'TOI', 
                'BLK', 
                'HIT', 
                'FOW', 
                'FOL',
                'Season']

predictor_columns = ['Player',
    'pim_gp',
    'even_g_gp',
    'pp_g_gp',
    'gw_g_gp',
    'even_a_gp',
    'pp_a_gp',
    'sh_gp',
    'sh_perc',
    'toi_gp',
    'blocks_gp',
    'hits_gp',
    'fow_gp',
    'fow_perc',
    'Season']

nhl_cleaned = nhl[wrangling_columns]
nhl_cleaned.columns

Index(['Player', 'GP', 'PIM', 'PS', 'total_goals', 'total_points',
       'even_goals', 'pp_goals', 'gw_goals', 'even_assists', 'pp_assists',
       'shots', 'TOI', 'BLK', 'HIT', 'FOW', 'FOL', 'Season'],
      dtype='object')

In [82]:
cleaned_df = pd.DataFrame(columns = predictor_columns) 
player_list = list(set(nhl_cleaned['Player']))

nhl_cleaned['pim_gp'] = nhl_cleaned['PIM'] / nhl_cleaned['GP']
nhl_cleaned['even_g_gp'] = nhl_cleaned['even_goals'] / nhl_cleaned['GP']
nhl_cleaned['pp_g_gp'] = nhl_cleaned['pp_goals'] / nhl_cleaned['GP']
nhl_cleaned['gw_g_gp'] = nhl_cleaned['gw_goals'] / nhl_cleaned['GP']
nhl_cleaned['even_a_gp'] = nhl_cleaned['even_assists'] / nhl_cleaned['GP']
nhl_cleaned['pp_a_gp'] = nhl_cleaned['pp_assists'] / nhl_cleaned['GP']
nhl_cleaned['sh_gp'] = nhl_cleaned['shots'] / nhl_cleaned['GP']
nhl_cleaned['sh_perc'] = nhl_cleaned['total_goals'] / nhl_cleaned['shots']
nhl_cleaned['toi_gp'] = nhl_cleaned['TOI'] / nhl_cleaned['GP']
nhl_cleaned['blocks_gp'] = nhl_cleaned['BLK'] / nhl_cleaned['GP']
nhl_cleaned['hits_gp'] = nhl_cleaned['HIT'] / nhl_cleaned['GP']
nhl_cleaned['fow_gp'] = nhl_cleaned['FOW'] / nhl_cleaned['GP']
nhl_cleaned['fow_perc'] = nhl_cleaned['FOW'] / (nhl_cleaned['FOW'] + nhl_cleaned['FOL']) 

nhl_cleaned = nhl_cleaned[predictor_columns]
nhl_cleaned = nhl_cleaned.fillna(0)

In [169]:
num_simulations = 1000

columns_loop = ['hits_gp',
 'even_g_gp',
 'sh_gp',
 'blocks_gp',
 'pp_a_gp',
 'pp_g_gp',
 'even_a_gp',
 'fow_gp',
 'gw_g_gp',
 'fow_perc',
 'sh_perc',
 'pim_gp']

new_df = pd.DataFrame(columns = ['Player',
'stat',
'lower_iqr',
'higher_iqr'])

for player in player_list:
    player_df = nhl_cleaned.loc[nhl_cleaned['Player'] == player].sort_values(by = 'Season')
    for column in columns_loop:
        ordered_df = player_df[player_df[column] != 0]
        ordered_df = ordered_df[[column]].reset_index()
        ordered_df = ordered_df.drop('index', axis = 1)
        if len(ordered_df) < 2.0:
            new_df = new_df.append({
            'Player' : player,
            'stat' : column,
            'lower_iqr' : 0,
            'higher_iqr' : 0}, ignore_index = True)
        else:
            mean = ordered_df.mean()
            sd = np.std(ordered_df)
            prediction = np.random.normal(mean, sd, num_simulations)
            stats_df = pd.DataFrame(prediction).describe().T
            lower_iqr = stats_df['25%'][0]
            upper_iqr = stats_df['75%'][0]
            predicted_stat_lower = ordered_df[column].iloc[-1] * (1 + lower_iqr)
            predicted_stat_higer = ordered_df[column].iloc[-1] * (1 + upper_iqr)
            # print(player, column, predicted_stat_lower, predicted_stat_higer)
            new_df = new_df.append({
                'Player' : player,
                'stat' : column,
                'lower_iqr' : lower_iqr,
                'higher_iqr' : upper_iqr}, ignore_index = True)
       
        
        


       
        
        

