In [1]:
import pandas as pd

In [2]:
def get_shot_avg(season):
    def get_data(season):
        ''' User inputs the last 2 numbers of the year the playoffs are in for the season of interest
            IE the 2020-2021 season would be 21. 
            This returns the raw data in a dataframe
        '''
        sea_1 = str(season-1).zfill(2)
        sea_2 = str(season).zfill(2)
        df = pd.read_csv(f'data/pbp/raw/pbp_query_20{sea_1}20{sea_2}.csv',low_memory=False) 

        return df

    def clean_data(df):
        df = df[df.event_type.isin(['SHOT','MISS','GOAL'])].copy()
        df['event_distance'] = ((89 - abs(df['coords_x']))**2 + df['coords_y']**2)**(1/2)

        return df

    def make_table(df):
        teams = df.home_team.unique().tolist()
        team_lst = []
        shot_mean = []
        for team in teams:
            df_temp = df[df.home_team == team]
            team_lst.append(team)
            shot_mean.append(round(df_temp.event_distance.mean(),2))
        dict = {'team':team_lst,'distance_mean':shot_mean}
    
    df_whole = get_data(season)
    df = clean_data(df_whole)
    adj_shot_dict = make_table(df)

In [3]:
get_shot_avg(21)

   team  distance_mean
26  CHI          31.50
18  NYI          31.70
20  ANA          31.97
23  CAR          33.04
3   EDM          33.21
2   TOR          33.22
7   NYR          33.23
13  VGK          33.23
30  S.J          33.54
11  WPG          33.67
21  VAN          33.69
1   T.B          33.74
28  DAL          33.85
16  CGY          34.15
12  ARI          34.22
14  L.A          34.26
9   DET          34.36
17  PIT          34.59
6   N.J          34.90
25  WSH          34.90
19  STL          35.13
24  CBJ          35.13
4   COL          35.31
10  NSH          35.36
29  MTL          35.60
15  OTT          35.71
8   FLA          35.82
5   BUF          35.82
22  BOS          35.89
0   PHI          36.50
27  MIN          36.85


In [4]:
def get_shot_avg_2(season):
    def get_data(season):
        ''' User inputs the last 2 numbers of the year the playoffs are in for the season of interest
            IE the 2020-2021 season would be 21. 
            This returns the raw data in a dataframe
        '''
        sea_1 = str(season-1).zfill(2)
        sea_2 = str(season).zfill(2)
        df = pd.read_csv(f'data/pbp/raw/pbp_query_20{sea_1}20{sea_2}.csv',low_memory=False) 

        return df

    def clean_data(df):
        df = df[df.event_type.isin(['SHOT','MISS','GOAL'])].copy()
        df['event_distance'] = ((89 - abs(df['coords_x']))**2 + df['coords_y']**2)**(1/2)

        return df

    def make_table(df):
        teams = df.home_team.unique().tolist()
        team_lst = []
        shot_mean_home = []
        shot_mean_away = []
        for team in teams:
            df_temp_home = df[df.home_team == team]
            df_temp_away = df[df.away_team == team]
            team_lst.append(team)
            shot_mean_home.append(round(df_temp_home.event_distance.mean(),2))
            shot_mean_away.append(round(df_temp_away.event_distance.mean(),2))
        dict = {'team':team_lst,'distance_h':shot_mean_home,'distance_a':shot_mean_away}
        shot_avg = pd.DataFrame(dict)
        shot_avg.sort_values(by='distance_h',ascending=True,inplace=True)
        #print(shot_avg)
        return shot_avg
    
    df_whole = get_data(season)
    df = clean_data(df_whole)
    df = make_table(df)
    return df

In [6]:
df = get_shot_avg_2(21)
print(df.std())
print(df.mean())

distance_h    1.349476
distance_a    0.712988
dtype: float64
distance_h    34.325484
distance_a    34.328710
dtype: float64


In [10]:
df

Unnamed: 0,team,distance_h,distance_a
26,CHI,31.5,33.64
18,NYI,31.7,35.33
20,ANA,31.97,35.12
23,CAR,33.04,33.72
3,EDM,33.21,34.19
2,TOR,33.22,32.34
7,NYR,33.23,35.25
13,VGK,33.23,34.03
30,S.J,33.54,34.12
11,WPG,33.67,34.27


In [7]:
def get_shot_avg_3(season):
    def get_data(season):
        ''' User inputs the last 2 numbers of the year the playoffs are in for the season of interest
            IE the 2020-2021 season would be 21. 
            This returns the raw data in a dataframe
        '''
        sea_1 = str(season-1).zfill(2)
        sea_2 = str(season).zfill(2)
        df = pd.read_csv(f'data/pbp/raw/pbp_query_20{sea_1}20{sea_2}.csv',low_memory=False) 

        return df

    def clean_data(df):
        df = df[df.event_type.isin(['SHOT','MISS','GOAL'])].copy()
        df['event_distance'] = ((89 - abs(df['coords_x']))**2 + df['coords_y']**2)**(1/2)

        return df

    def make_table(df):
        teams = df.home_team.unique().tolist()
        team_lst = []
        shot_mean_home = []
        shot_mean_away = []
        for team in teams:
            df_temp_home = df[df.home_team == team]
            df_temp_away = df[df.away_team == team]
            team_lst.append(team)
            shot_mean_home.append(round(df_temp_home.coords_y.mean(),2))
            shot_mean_away.append(round(df_temp_away.coords_y.mean(),2))
        dict = {'team':team_lst,'distance_h':shot_mean_home,'distance_a':shot_mean_away}
        shot_avg = pd.DataFrame(dict)
        shot_avg.sort_values(by='distance_h',ascending=True,inplace=True)
        #print(shot_avg)
        return shot_avg
    
    df_whole = get_data(season)
    df = clean_data(df_whole)
    df = make_table(df)
    return df

In [8]:
df_y = get_shot_avg_3(21)
df_y

Unnamed: 0,team,distance_h,distance_a
21,VAN,-2.92,0.19
16,CGY,-1.86,-0.23
26,CHI,-1.79,0.56
27,MIN,-0.69,-0.01
11,WPG,-0.69,-0.61
28,DAL,-0.63,0.58
30,S.J,-0.38,-0.59
13,VGK,-0.36,0.5
12,ARI,-0.28,0.99
22,BOS,-0.18,0.11


In [9]:
print(df_y.std())
print(df_y.mean())

distance_h    0.984418
distance_a    0.601347
dtype: float64
distance_h    0.047742
distance_a    0.042258
dtype: float64
