In [1]:
import json

import pandas as pd
import requests
import matplotlib.pyplot as plt
%matplotlib inline

# Get advanced (ORPM/DRPM) stats from ESPN

In [2]:
url_base = 'http://www.espn.com/nba/statistics/rpm/_/'
urls = [url_base]
for i in range(1, 12):
    urls.append(url_base + 'page/' + str(i))

In [None]:
years = ['2023', '2022', '2021', '2020']
espn_dfs = {}
for year in years:
    url_base = 'http://www.espn.com/nba/statistics/rpm/_/year/' + str(year) + '/'
    urls = [
        url_base + 'page/' + str(i)
        for i in range(1, 12)
    ]
    dfs = []
    for url in urls:
        # print(url)
        df = pd.read_html(url)[0]
        df.columns = df.iloc[0].apply(lambda x: x.lower())
        df = df.iloc[1:]
        df = df[df.columns[1:]]
        df['position'] = df.name.apply(lambda x: x.split(',')[1][1:])
        df.name = df.name.apply(lambda x: x.split(',')[0])
        dfs.append(df)
    espn_df = pd.concat(dfs)
    espn_dfs[year] = espn_df

In [318]:
# Number of players in 2023
espn_dfs['2023'].name.nunique()

440

# Get stats for minutes played by each player against all other players

In [6]:
teams_response = requests.get("https://api.pbpstats.com/get-teams/nba")
teams = teams_response.json()
team_names = [team['text'] for team in teams['teams']]

In [None]:
players_url = "https://api.pbpstats.com/get-team-players-for-season"
players_teams = []
year = "2023"
def translate_year(year):
    return str(int(year)-1) + '-' + str(int(year) % 100)

for year in ['2021', '2022', '2023']:
    for team in [team['id'] for team in teams['teams']]:
        players_params = {
            "Season": translate_year(year),
            "SeasonType": "Regular Season",
            "TeamId": team
        }
        players_response = requests.get(players_url, params=players_params)
        players = players_response.json()['players']
        players_teams += [
            {'player': player, 'name': players[player], 'team': team, "year": year} 
            for player in players]

In [64]:
players_teams_df = pd.DataFrame(players_teams)
players_teams_df.tail()

Unnamed: 0,player,name,team,year
1896,1630264,Anthony Gill,1610612764,2023
1897,1630557,Corey Kispert,1610612764,2023
1898,1630648,Jordan Schakel,1610612764,2023
1899,1630692,Jordan Goodwin,1610612764,2023
1900,1631098,Johnny Davis,1610612764,2023


In [7]:
def get_team_name(team_name):
    if team_name == 'BKN':
        return 'BRK'
    elif team_name == 'CHA':
        return 'CHO'
    elif team_name == 'PHX':
        return 'PHO'
    else:
        return team_name

In [302]:
def get_team(player, year, by_name=False):
    if not by_name:
        return players_teams_df[
            (players_teams_df.player == player) & (players_teams_df.year == year)
        ].iloc[0]['team']
    else:
        return players_teams_df[
            (players_teams_df.name == player) & (players_teams_df.year == year)
        ].iloc[0]['team']

def get_team_by_id(team_id):
    matching_teams = [t for t in teams['teams'] if t['id'] == team_id]
    if len(matching_teams) > 0:
        return matching_teams[0]['text']
    else:
        return None

def get_opp_player_minutes(player, vs_team, season, offdef='Offense'):
    # Season like "2022-23"
    url = "https://api.pbpstats.com/get-possessions/nba"
    player_team = get_team(player, season)
    params = {
        "TeamId": vs_team,
        "Season": translate_year(season),
        "SeasonType": "Regular Season",
        "OffDef": offdef, # Offense or Defense
        "StartType": "All", # see below for possible values for StartType
        "Opponent": player_team,
        "OpponentExactly1OnFloor": player
    }
    response = requests.get(url, params=params)
    response_json = response.json()
    return pd.DataFrame(response_json["player_results"])
    # team_stats = response_json["team_results"]
    # possessions = pd.DataFrame(response_json["possessions"])

In [272]:
def name_search_player(name):
    return players_teams_df[players_teams_df.name.str.contains(name)].iloc[0]['player']

def name_search_team(team):
    return [t for t in teams['teams'] if t['text'] == team][0]['id']

def opp_drpm(minutes_df):
    total = (minutes_df.SecondsPlayed.fillna(0).values * minutes_df.drpm.astype('float').values).sum()
    return {'weighted_drpm': total, 'seconds': minutes_df.SecondsPlayed.fillna(0).values.sum()}

def opp_orpm(minutes_df):
    total = (minutes_df.SecondsPlayed.fillna(0).values * minutes_df.orpm.astype('float').values).sum()
    return {'weighted_orpm': total, 'seconds': minutes_df.SecondsPlayed.fillna(0).values.sum()}

In [None]:
# Iterate through all players and all teams
team_ids = players_teams_df.team.unique()
year = '2023'
if 'player_opp_drpms' not in globals():
    player_opp_drpms = []
for player in players_teams_df[players_teams_df.year == year].name:
    existing = [p for p in player_opp_drpms if p['player'] == player]
    if len(existing) > 0 and existing[0]['']
        continue
    player_id = name_search_player(player)
    player_minutes_dfs = []
    drpms = []
    for team in team_ids:
        if team == get_team(player_id, '2023'):
            continue
        try:
            df = get_opp_player_minutes(player_id, team, year)
        except:
            import time
            time.sleep(0.2)
            df = get_opp_player_minutes(player_id, team, year)
        if len(df) == 0:
            continue
        player_minutes_dfs.append(df)
        minutes_df_merged = df.merge(espn_dfs['2023'], left_on='Name', right_on='name')
        drpms.append(opp_drpm(minutes_df_merged))
    player_opp_drpms.append({
        'player': player,
        'player_id': player_id,
        'player_minutes_dfs': player_minutes_dfs,
        'avg_opp_drpm': sum([d['weighted_drpm'] for d in drpms]) / sum(
            [d['seconds'] for d in drpms]) if len(drpms) > 0 else None
    })
    print(player_opp_drpms[-1]['player'], player_opp_drpms[-1]['avg_opp_drpm'])

In [None]:
for player in player_opp_drpms:
    if len(player['player_minutes_dfs']) > 0 and len(player['player_minutes_dfs']) < 55:
        player['player_minutes_dfs'] = pd.concat(
            player['player_minutes_dfs']
        ).merge(
            espn_dfs['2023'], left_on='Name', right_on='name')

In [273]:
for player in player_opp_drpms:
    if len(player['player_minutes_dfs']) > 0:
        avg_opp_orpm = opp_orpm(player['player_minutes_dfs'])
        player['avg_opp_orpm'] = avg_opp_orpm['weighted_orpm'] / avg_opp_orpm['seconds']
        avg_opp_drpm = opp_drpm(player['player_minutes_dfs'])
        player['avg_opp_drpm'] = avg_opp_drpm['weighted_drpm'] / avg_opp_drpm['seconds']
    else:
        player['avg_opp_orpm'] = None


In [309]:
orpm_drpm_df = pd.DataFrame({
    'player': [player['player'] for player in player_opp_drpms],
    'avg_opp_orpm': [player['avg_opp_orpm'] for player in player_opp_drpms],
    'avg_opp_drpm': [player['avg_opp_drpm'] for player in player_opp_drpms],
    'team': [get_team_by_id(get_team(player['player'], '2023', by_name=True)) for player in player_opp_drpms]
}).dropna(subset=['avg_opp_orpm'])

# Analysis

In [311]:
# Players with highest opposing DRPM
orpm_drpm_df.sort_values('avg_opp_drpm').iloc[-10:]

Unnamed: 0,player,avg_opp_orpm,avg_opp_drpm,team
208,Marcus Morris Sr.,0.191363,0.846881,LAC
465,O.G. Anunoby,0.053317,0.85382,TOR
69,Nikola Vucevic,0.136335,0.870881,CHI
410,Jusuf Nurkic,0.121002,0.89098,POR
37,Marcus Smart,0.065907,0.900937,BOS
372,P.J. Tucker,0.18304,0.912638,PHI
196,Tyrese Haliburton,0.034268,0.957469,IND
190,Myles Turner,0.052436,0.959811,IND
34,Al Horford,0.082431,0.96793,BOS
52,Mason Plumlee,0.056304,1.126215,CHA


In [323]:
# Players with highest opposing ORPM
orpm_drpm_df.sort_values('avg_opp_orpm').iloc[-10:]

Unnamed: 0,player,avg_opp_orpm,avg_opp_drpm,team
429,Chima Moneke,0.261102,-0.982678,SAC
357,Markelle Fultz,0.265797,0.840594,ORL
222,Russell Westbrook,0.304769,0.202851,LAL
224,Anthony Davis,0.324831,0.599213,LAL
221,LeBron James,0.327397,0.722197,LAL
277,Khris Middleton,0.359057,-0.049136,MIL
256,Udonis Haslem,0.359508,0.312957,MIA
204,Kendall Brown,0.412247,-1.088522,IND
239,Cole Swider,0.425193,-0.138714,LAL
228,Sterling Brown,0.567772,-0.411564,LAL


In [321]:
# Players on the Pelicans
# Bench players face worse opposing players, sensibly
orpm_drpm_df[orpm_drpm_df.team == 'NOP'].sort_values('avg_opp_drpm')

Unnamed: 0,player,avg_opp_orpm,avg_opp_drpm,team
321,Dereon Seabron,-0.55976,-1.033774,NOP
305,Garrett Temple,-0.133906,-0.649165,NOP
314,Kira Lewis Jr.,0.136892,-0.614779,NOP
311,Devonte' Graham,0.064605,-0.318731,NOP
308,Willy Hernangomez,0.039004,-0.280454,NOP
309,Larry Nance Jr.,0.074414,-0.187078,NOP
313,Jaxson Hayes,0.087461,-0.071138,NOP
319,Jose Alvarado,0.027814,0.051028,NOP
320,Dyson Daniels,-0.026917,0.111501,NOP
315,Naji Marshall,0.073726,0.202381,NOP


In [320]:
# Players on the Celtics
orpm_drpm_df[orpm_drpm_df.team == 'BOS'].sort_values('avg_opp_drpm')

Unnamed: 0,player,avg_opp_orpm,avg_opp_drpm,team
50,JD Davison,-0.668367,-1.006804,BOS
42,Justin Jackson,-0.756831,-0.963707,BOS
46,Mfiondu Kabengele,-0.52545,-0.906268,BOS
49,Sam Hauser,-0.109908,-0.456126,BOS
38,Noah Vonleh,-0.198012,-0.243768,BOS
44,Luke Kornet,-0.17714,-0.216954,BOS
48,Payton Pritchard,-0.273307,-0.003776,BOS
40,Malcolm Brogdon,0.092916,0.011672,BOS
45,Robert Williams III,0.111416,0.10276,BOS
47,Grant Williams,0.029776,0.34947,BOS


In [None]:
orpm