<a href="https://colab.research.google.com/github/vineelbhatti/NBA_Advanced_Stat_Analysis/blob/main/NBA_Advanced_Stats_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Import necessary modules
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objs as go

In [None]:
#Open datasets
year = 2023
#Open url
url = "https://www.basketball-reference.com/leagues/NBA_2023_totals.html".format(year)
#Open url with per game stats
url_per_game = "https://www.basketball-reference.com/leagues/NBA_2023_per_game.html".format(year)
#Open url with advanced stats
url_advanced = "https://www.basketball-reference.com/leagues/NBA_2023_advanced.html".format(year)
html = urlopen(url_advanced)
soup = BeautifulSoup(html)

In [None]:
#Find all column headers
soup.findAll('tr', limit=2)

#Find all headers in dataset
headers = [th.getText() for th in soup.findAll('tr', limit=2)[0].findAll('th')]

#Use all headers as columns
headers = headers[1:]

In [None]:
#Converting url to dataframe
rows = soup.findAll('tr')[1:]
player_stats = [[td.getText() for td in rows[i].findAll('td')] for i in range(len(rows))]

In [None]:
#Create dataframe
stats = pd.DataFrame(player_stats, columns=headers)
stats = stats[stats['Player'].notna()]

In [None]:
### Use this url to understand above code: https://medium.com/@osanchez2323/web-scraping-nba-stats-4b4f8c525994 ###

In [None]:
#Convert stats to number formats
stats['MP'] = stats['MP'].astype(int)
#Remving all players with less than 200 minutes played
stats = stats[stats['MP'] >= 200]
#Converting Win Share stat to float data type
stats['WS'] = stats['WS'].astype(float)
#Converting VORP stat to float data type
stats['VORP'] = stats['VORP'].astype(float)
#Converting BPM stat to float data type
stats['BPM'] = stats['BPM'].astype(float)
#Converting Usage Percentage stat to float data type
stats['USG%'] = stats['USG%'].astype(float)
#Conerting Assist Percentage stat to float data type
stats['AST%'] = stats['AST%'].astype(float)
#Converting PER stat to float data type
stats['PER'] = stats['PER'].astype(float)
#Converting True Shooting Percentage stat to float data type
stats['TS%'] = stats['TS%'].astype(float)

In [None]:
#Create a dataset with just player names
labels = filtered_stats['Player']

In [None]:
#Remove unwanted entries
filtered_stats = filtered_stats[filtered_stats['Pos'] != 'PF-SF']
filtered_stats = filtered_stats[filtered_stats['Pos'] != 'SF-SG']
filtered_stats = filtered_stats[filtered_stats['Tm'] != 'TOT']

In [None]:
#Create dictionaries associating each position and team with a color
position_colors = {'PG': 'red', 'SG': 'green', 'SF': 'blue', 'PF': 'orange', 'C': 'purple'}
nba_team_colors = {'ATL':'#E03A3E','BOS':'#007A33','BRK':'#000000','CHO':'#1D1160','CHI':'#CE1141','CLE':'#6F263D','DAL':'#00538C','DEN':'#0E2240','DET':'#C8102E','GSW':'#1D428A','HOU':'#CE1141','IND':'#002D62','LAC':'#C8102E','LAL':'#552583','MEM':'#5D76A9','MIA':'#98002E','MIL':'#00471B','MIN':'#0C2340','NOP':'#0C2340','NYK':'#F58426','OKC':'#007AC1','ORL':'#0077C0','PHI':'#ED174C','PHO':'#1D1160','POR':'#E03A3E','SAC':'#5A2D81','SAS':'#C4CED4','TOR':'#CE1141','UTA':'#002B5C','WAS':'#E31837'}

trace = go.Scatter(
    x=filtered_stats['WS'],
    y=filtered_stats['VORP'],
    mode='markers',
    text=labels,  # set the labels to show on hover
    hovertemplate='Point %{text}: (%{x:.2f}, %{y:.2f})<extra></extra>',  # customize the hover text
    marker=dict(
        color=[nba_team_colors[p] for p in filtered_stats['Tm']],
        size=10,
        opacity=0.7,
        symbol='circle')
)

In [None]:
# Create the layout for the plot
layout = go.Layout(
    title='VORP vs Win Shares',
    xaxis=dict(title='Win Shares'),
    yaxis=dict(title='VORP'),
)

# Combine the trace and layout into a figure
fig = go.Figure(data=[trace], layout=layout)

# Show the plot
fig.show()

In [None]:
#Representing each position with a color
position_colors = {'PG': 'red', 'SG': 'green', 'SF': 'blue', 'PF': 'orange', 'C': 'purple'}

#Create scatter plot
trace = go.Scatter(
    x=filtered_stats['WS'],
    y=filtered_stats['BPM'],
    mode='markers',
    text=labels,  # set the labels to show on hover
    hovertemplate='Point %{text}: (%{x:.2f}, %{y:.2f})<extra></extra>',  # customize the hover text
    marker=dict(
        color=[nba_team_colors[p] for p in filtered_stats['Tm']],
        size=10,
        opacity=0.7,
        symbol='circle')
)

In [None]:
# Create the layout for the plot
layout = go.Layout(
    title='BPM vs Win Shares',
    xaxis=dict(title='Win Shares'),
    yaxis=dict(title='BPM'),
)

# Combine the trace and layout into a figure
fig = go.Figure(data=[trace], layout=layout)

# Show the plot
fig.show()

In [None]:
#Create a dictionary representing each position with a color
position_colors = {'PG': 'red', 'SG': 'green', 'SF': 'blue', 'PF': 'orange', 'C': 'purple'}

#Create plot
trace = go.Scatter(
    x=filtered_stats['WS'],
    y=filtered_stats['PER'],
    mode='markers',
    text=labels,  # set the labels to show on hover
    hovertemplate='Point %{text}: (%{x:.2f}, %{y:.2f})<extra></extra>',  # customize the hover text
    marker=dict(
        color=[nba_team_colors[p] for p in filtered_stats['Tm']],
        size=10,
        opacity=0.7,
        symbol='circle')
)

In [None]:
# Create the layout for the plot and title axes
layout = go.Layout(
    title='PER vs Win Shares',
    xaxis=dict(title='Win Shares'),
    yaxis=dict(title='PER'),
)

# Combine the trace and layout into a figure
fig = go.Figure(data=[trace], layout=layout)

# Show the plot
fig.show()