In [43]:
#Importing the necessary modules with their accepted aliases
import pandas as pd
import numpy as np

In [44]:
def distanceFromOrigin(x,y): 
    '''
    Determines the distance in feet from the hoop (origin) in relation to the shot.
    Will be useful in order to categorize the shot zones.
    
    Keyword arguments:
    x: x-coordinate of the shot
    y: y-coordinate of the shot

    '''
    
    return np.sqrt(x**2 + y**2)

In [49]:
#Read in the csv file into a dataframe using pandas built-in function
shotData = pd.read_csv('shots_data.csv')
shotData.head()

Unnamed: 0,team,x,y,fgmade
0,Team A,-5.0,26.4,0
1,Team A,-0.8,1.2,1
2,Team A,-13.9,9.5,1
3,Team A,-5.4,26.2,0
4,Team A,-4.9,14.5,1


In [50]:
#To make the code reproducibile, add a column named distance that calculates the distance from the shot to the hoop
shotData['distance'] = distanceFromOrigin(shotData['x'], shotData['y'])
shotData

Unnamed: 0,team,x,y,fgmade,distance
0,Team A,-5.0,26.4,0,26.869313
1,Team A,-0.8,1.2,1,1.442221
2,Team A,-13.9,9.5,1,16.836270
3,Team A,-5.4,26.2,0,26.750701
4,Team A,-4.9,14.5,1,15.305555
...,...,...,...,...,...
499,Team B,-0.5,2.2,0,2.256103
500,Team B,3.5,7.6,0,8.367198
501,Team B,0.2,8.8,0,8.802272
502,Team B,-0.6,0.7,1,0.921954


In [51]:
#Add a new column to the shotData Data Frame which establishes the shot zone based on certain parameters
#If the y-coordinate is greater than 7.8 feet and the distance is greater than 23.75 feet, it is a non-corner three point shot
#If the y-cordinate is less than or equal to 7.8 feet and the x-coordinate is less than 22 feet in the 
#left direction and greater than 22 feet in the right direciton, it is corner three shot
#Two point shot if the x-coordinate is between 22 in the left direction and 22 in the right direction and the distance from the hoop is less than 23.75ft
shotData['type'] =  np.where((shotData['y'] > 7.8) & (distanceFromOrigin(shotData['x'], shotData['y']) >= 23.75), 'Non Corner Three',
                            np.where((shotData['y'] <= 7.8) & ((shotData['x'] <= -22.0) | (shotData['x'] >= 22.0)), 'Corner Three',
                            np.where(((shotData['x'] > -22.0) & (shotData['x'] < 22.0)) & (distanceFromOrigin(shotData['x'],shotData['y']) < 23.75), 'Two Point', 'Else')))

print(shotData)

#Now, we must calculte the shot distribution for each zone and team
#To perform this calculation, a groupby statement will allow the dataframe to group its data based off team and type
#We will divide the count of each shot type per time by the count of total shots, multiplying the result by 100 to convert to a %
df = 100 * shotData.groupby(['team','type'])['type'].count() / shotData.groupby('team')['x'].count()

print(df)

       team     x     y  fgmade   distance              type
0    Team A  -5.0  26.4       0  26.869313  Non Corner Three
1    Team A  -0.8   1.2       1   1.442221         Two Point
2    Team A -13.9   9.5       1  16.836270         Two Point
3    Team A  -5.4  26.2       0  26.750701  Non Corner Three
4    Team A  -4.9  14.5       1  15.305555         Two Point
..      ...   ...   ...     ...        ...               ...
499  Team B  -0.5   2.2       0   2.256103         Two Point
500  Team B   3.5   7.6       0   8.367198         Two Point
501  Team B   0.2   8.8       0   8.802272         Two Point
502  Team B  -0.6   0.7       1   0.921954         Two Point
503  Team B  -0.3   3.6       0   3.612478         Two Point

[504 rows x 6 columns]
team    type            
Team A  Corner Three         6.428571
        Non Corner Three    24.285714
        Two Point           69.285714
Team B  Corner Three         5.357143
        Non Corner Three    27.678571
        Two Point           6

In [52]:
def eFG(FGM, _3PM, FGA):
    '''
    Calculates the eFG% for the entire team
    
    Keyword Arguments:
    FGM: The number of field goals made per zone (fgmade == 1 for the specific shot zone)
    _3PM: The number of three points made in a specific zone. Since the eFG% is calculated per zone, the two-point zone
    will not consist of any three points made as we are not considering the total eFG%. Similarly, for the non-corner three
    shot zone, the value of _3PM will only consist of the non-corner threes and not the total number of three points (combined NC3 and C3)
    FGA: The number of shots attempted for that specific shot zone
    '''
    
    return 100 * ((FGM + (0.5 * _3PM))/FGA)


In [53]:
#This allows us to achieve the number of made field goals per shot zone and team
madeShots = shotData.groupby(['type','team'])['fgmade'].sum()
#This allows us to acheive the number of field goal attempts per shot zone and team
attemptedShots = shotData.groupby(['type','team'])['fgmade'].count()

#Now that we have these arguments, we can apply them to the eFG function to determine the eFG% for each zone
TeamList = (eFG(madeShots, madeShots, attemptedShots))

#The Two Point category raises an exception however, since the number of three points made in this category is 0
TeamList['Two Point'] = eFG(madeShots, 0, attemptedShots)
print(TeamList)

type              team  
Corner Three      Team A    75.000000
                  Team B    50.000000
Non Corner Three  Team A    46.323529
                  Team B    50.806452
Two Point         Team A    48.969072
                  Team B    44.666667
Name: fgmade, dtype: float64


# Displaying Results

In [41]:
#In order to display the results in an elegant, readable manner, we can convert them to a data frame using pandas built-in functions
eFGAns = pd.DataFrame(TeamList)

#This function allows us to rename the column to a more meaningful name
eFGAns.rename(columns = {'fgmade': 'eFG%'}, inplace = True)
eFGAns

Unnamed: 0_level_0,Unnamed: 1_level_0,eFG%
type,team,Unnamed: 2_level_1
Corner Three,Team A,75.0
Corner Three,Team B,50.0
Non Corner Three,Team A,46.323529
Non Corner Three,Team B,50.806452
Two Point,Team A,48.969072
Two Point,Team B,44.666667


In [55]:
#Displaying the results of shot percentage per zone for each team in a readable fashion
final = pd.DataFrame(df, columns = ['Shot Percentage (%)'])
final

Unnamed: 0_level_0,Unnamed: 1_level_0,Shot Percentage (%)
team,type,Unnamed: 2_level_1
Team A,Corner Three,6.428571
Team A,Non Corner Three,24.285714
Team A,Two Point,69.285714
Team B,Corner Three,5.357143
Team B,Non Corner Three,27.678571
Team B,Two Point,66.964286
