In [1]:
import pandas as pd
import numpy as np

pd.options.mode.chained_assignment = None

The data was downloaded from [Kaggle](https://www.kaggle.com/nathanlauga/nba-games?select=games_details.csv)

In [2]:
player_stats = pd.read_csv("~/Downloads/Seasons_Stats.csv")

Since we are only interested in 3-point data for this analysis, we select only those columns from the dataframe.

In [3]:
three_point_data = player_stats[["Player", "Year", "3P%", "3PA"]]
print(three_point_data.head().to_string())

            Player    Year  3P%  3PA
0  Curly Armstrong  1950.0  NaN  NaN
1     Cliff Barker  1950.0  NaN  NaN
2    Leo Barnhorst  1950.0  NaN  NaN
3       Ed Bartels  1950.0  NaN  NaN
4       Ed Bartels  1950.0  NaN  NaN


We then filter out players where 3-point data is not available (i.e. **3P%** column is set to `NaN`). This is possibly because there was no 3-point line at the time or simply because the player did not attempt any 3-pointers.

In [4]:
# Filter out players that don't have any 3PT data available (no 3PT line or did not attempt 3-pointers)
three_point_data = three_point_data.dropna(subset=["3P%"]).sort_values(
    ["Player", "Year", "3P%"]
)

print(three_point_data.head().to_string())

          Player    Year    3P%   3PA
8035  A.C. Green  1986.0  0.167   6.0
8420  A.C. Green  1987.0  0.000   5.0
8807  A.C. Green  1988.0  0.000   2.0
9242  A.C. Green  1989.0  0.235  17.0
9688  A.C. Green  1990.0  0.283  46.0


We then group the 3-point data by player by season since we want to understand how a given player's 3-point percentage changed from one season to the next.

In [5]:
three_point_data = (
    three_point_data.groupby(["Player", "Year"])
    .agg({"3PA": "sum", "3P%": "mean"})
    .rename(columns={"3PA": "total_atp_3", "3P%": "avg_3_pct"})
)

print(three_point_data.head().to_string())

                   total_atp_3  avg_3_pct
Player     Year                          
A.C. Green 1986.0          6.0      0.167
           1987.0          5.0      0.000
           1988.0          2.0      0.000
           1989.0         17.0      0.235
           1990.0         46.0      0.283


We set a minimum criteria of 3-pointers attempted to make sure all possible outliers are excluded.

In [6]:
filtered_three_point_data = three_point_data[three_point_data["total_atp_3"] > 200]

print(filtered_three_point_data.head().to_string())

                     total_atp_3  avg_3_pct
Player       Year                          
Aaron Brooks 2009.0        309.0   0.366000
             2010.0        525.0   0.398000
             2011.0        472.0   0.303000
             2013.0        268.0   0.345667
             2014.0        496.0   0.386000


We then comparer each player's current 3-point percentage to the same percentage in the previous year.
NOTE: This can likely be improved upon from an efficiency and readability standpoint.

In [7]:
filtered_three_point_data["diff"] = ((filtered_three_point_data["avg_3_pct"]
    - filtered_three_point_data["avg_3_pct"].shift(1)) * 100)

filtered_three_point_data["diff"] = filtered_three_point_data["diff"].round(4)

filtered_three_point_data = filtered_three_point_data.reset_index()

print(filtered_three_point_data.head().to_string())

         Player    Year  total_atp_3  avg_3_pct    diff
0  Aaron Brooks  2009.0        309.0   0.366000     NaN
1  Aaron Brooks  2010.0        525.0   0.398000  3.2000
2  Aaron Brooks  2011.0        472.0   0.303000 -9.5000
3  Aaron Brooks  2013.0        268.0   0.345667  4.2667
4  Aaron Brooks  2014.0        496.0   0.386000  4.0333


Since the data is sorted by player and year, there are instances where we are comparing 3-point percentages for 2 different players - in those cases, I set the **diff** column value to `None`.

In [8]:
mask = filtered_three_point_data["Player"] != filtered_three_point_data[
    "Player"
].shift(1)

filtered_three_point_data["diff"][mask] = np.nan

print(filtered_three_point_data.head().to_string())

         Player    Year  total_atp_3  avg_3_pct    diff
0  Aaron Brooks  2009.0        309.0   0.366000     NaN
1  Aaron Brooks  2010.0        525.0   0.398000  3.2000
2  Aaron Brooks  2011.0        472.0   0.303000 -9.5000
3  Aaron Brooks  2013.0        268.0   0.345667  4.2667
4  Aaron Brooks  2014.0        496.0   0.386000  4.0333


Finally some results! First, I was curious to know who had the biggest drop from the 3-point line from one year to the next. Interestingly, Toni Kukoc had a big drop from 1999 to 2000 but one of the biggest jumps in history from 2000 to 2001.

In [9]:
min_player_change = (
    filtered_three_point_data.groupby(["Player", "Year", "total_atp_3", "avg_3_pct"])
    .min()
    .reset_index()
    .sort_values(by="diff", ascending=True)
)

print(min_player_change.head().to_string())

               Player    Year  total_atp_3  avg_3_pct     diff
2125        Tony Delk  2002.0        428.0   0.310667 -19.9667
2121       Toni Kukoc  2000.0        336.0   0.260667 -14.2333
2058      Steve Smith  2005.0        210.0   0.337333 -13.4667
1848  Reggie Williams  1994.0        230.0   0.278000 -13.1667
340     Chris Whitney  2003.0        328.0   0.278333 -12.7667


Here are the top 5 players with a the largest one season increase from the 3-point line.

In [10]:
max_player_change = filtered_three_point_data.groupby(['Player', 'Year', 'total_atp_3', 'avg_3_pct']) \
                                    .max() \
                                    .reset_index() \
                                    .sort_values(by='diff', ascending=False)

max_player_change = max_player_change[(max_player_change['avg_3_pct'] >= 0.35) & (max_player_change['diff'])] \
                                    .reset_index()

print(max_player_change.head().to_string())

   index            Player    Year  total_atp_3  avg_3_pct  diff
0    102  Anthony Tolliver  2014.0        247.0   0.413000  19.3
1   2122        Toni Kukoc  2001.0        314.0   0.445667  18.5
2   1081       Joe Johnson  2005.0        370.0   0.478000  17.3
3   1206      Kevin Durant  2009.0        230.0   0.422000  13.4
4   2057       Steve Smith  2002.0        246.0   0.472000  13.3
