We will try to scrape salary data for each player listed in the Historical_RaptorData.csv 


In [19]:
import pandas as pd
import numpy as np
from tqdm.auto import tqdm, trange
import time

In [2]:
histor_raptor_player = pd.read_csv("./raw/historical_RAPTOR_by_player.csv")

In [3]:
histor_raptor_player

Unnamed: 0,player_name,player_id,season,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact
0,Alaa Abdelnaby,abdelal01,1991,640,303,-3.938450,-0.510076,-4.448526,-0.265191,-0.226163,-0.039028,-3.905732,-1.696796,-5.602528,0.199241
1,Alaa Abdelnaby,abdelal01,1992,1998,959,-2.553849,-0.197943,-2.751792,-0.006893,0.070322,-0.077215,-2.499576,-0.721876,-3.221452,0.485436
2,Alaa Abdelnaby,abdelal01,1993,2754,1379,-2.373736,-2.069808,-4.443544,-1.227189,-0.845272,-0.381918,-2.328144,-2.002263,-4.330407,0.488266
3,Alaa Abdelnaby,abdelal01,1994,320,159,-6.140056,-2.748312,-8.888368,-0.500600,-0.500600,0.000000,-5.820936,-3.526560,-9.347496,0.511152
4,Alaa Abdelnaby,abdelal01,1995,984,506,-3.846543,-1.268012,-5.114556,-0.615802,-0.615802,0.000000,-3.715429,-1.027024,-4.742453,1.545922
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19154,Ivica Zubac,zubaciv01,2018,871,410,-2.903709,2.688832,-0.214877,0.531968,0.531968,0.000000,-2.616331,0.752875,-1.863456,0.311576
19155,Ivica Zubac,zubaciv01,2019,2345,1079,-2.362444,1.813768,-0.548676,1.217134,1.114995,0.102138,-2.810283,1.157824,-1.652459,0.077074
19156,Ivica Zubac,zubaciv01,2020,3447,1646,1.099849,3.549458,4.649308,6.224207,5.020903,1.203304,-0.310842,3.331974,3.021132,-0.410164
19157,Ivica Zubac,zubaciv01,2021,3908,1910,-0.909039,2.525735,1.616696,4.261327,3.704630,0.556697,-0.874338,2.280011,1.405673,-0.117623


### Example of how salary data is shown:
---
It is present as a table under the page for each player as shown [here](https://www.basketball-reference.com/players/j/jamesle01.html#all_salaries).
The URL for LeBron James is:
```
https://www.basketball-reference.com/players/j/jamesle01.html#all_salaries
```
<!-- <script src="https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fplayers%2Fj%2Fjamesle01.html&div=div_all_salaries"></script> -->

However, we can't access the table directly since it's loaded in from a SQL backend and HTML is used to wrap the table but doesn't show the values directly. Instead, we have to use the url of the widget for the table:

```
https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fplayers%2Fj%2Fjamesle01.html&div=div_all_salaries
```

We can replace `%2Fplayers%2Fj%2Fjamesle01` with `%2Fplayers%2Fa%2Fabdelal01` to get the url for NBA player Alaa Abdelnaby:

```
https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fplayers%2Fa%2Fabdelal01.html&div=div_all_salaries
```

In other words, the link for each player's salary data is essentially deliminated as:

`https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fplayers%2F` + `<letter_of_last_name>` + `%2F` + `<player_id>` + `.html&div=div_all_salaries`
    
The `player_id` is readily available as a column in the `histor_raptor_player`.

### Scrape 1 page:
---
https://medium.com/analytics-vidhya/intro-to-scraping-basketball-reference-data-8adcaa79664a

In [4]:
# from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests

In [5]:
url = "https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fplayers%2Fj%2Fjamesle01.html&div=div_all_salaries"
# get HTML data
# html = urlopen(url)
page = requests.get(url)
# Extract the HTML text.
html = page.text

# get bs4 obj from HTML
soup = BeautifulSoup(html, features = "lxml")

If there is a 429 message (too many requests), this can be identified with `page.headers` and looking for the `Retry-After` in the headers. The value would be the delay time.

In [12]:
'Retry-After' in page.headers

False

In [11]:
dict(page.headers)

{'Date': 'Fri, 18 Nov 2022 17:33:12 GMT',
 'Content-Type': 'text/javascript',
 'Content-Length': '2021',
 'Connection': 'keep-alive',
 'SR-Header': 'memcached-fastcgi count:15',
 'SR-key': '/wg.fcgi?css=1&site=bbr&url=%2Fplayers%2Fj%2Fjamesle01.html&div=div_all_salaries',
 'X-Frame-Options': 'SAMEORIGIN',
 'Strict-Transport-Security': 'max-age=2592000; includeSubDomains',
 'Vary': 'Accept-Encoding',
 'Content-Encoding': 'gzip',
 'X-Content-Type-Options': 'nosniff',
 'Referrer-Policy': 'no-referrer-when-downgrade',
 'Last-Modified': 'Fri, 18 Nov 2022 16:58:08 GMT',
 'CF-Cache-Status': 'EXPIRED',
 'Accept-Ranges': 'bytes',
 'Set-Cookie': '__cf_bm=ZMPNtrHG81itdGmTFauLwp11ogTF60VwIKPVWkCFnHY-1668792792-0-AbviANKf0Yz2eacOKmmi6Sg6seFe7rm3cgkbl64SVolko3ujU0knhS8+VvbQ54xQ4//byAZ6zoBOinxakW6IUKo=; path=/; expires=Fri, 18-Nov-22 18:03:12 GMT; domain=.sports-reference.com; HttpOnly; Secure; SameSite=None',
 'Server': 'cloudflare',
 'CF-RAY': '76c2732549ca03e8-ORD'}

Each row stores the season as a `th` while all other attributes can be accessed as `td`.

In [8]:
soup.find_all("tr", limit = 1)[0]

<tr><th aria-label="Season" class="poptip sort_default_asc center" data-stat="season" data-tip="If listed as single number, the year the season ended.&lt;br&gt;★ - Indicates All-Star for league.&lt;br&gt;Only on regular season tables." scope="col">Season</th><th aria-label="Team" class="poptip sort_default_asc left" data-stat="team_name" scope="col">Team</th><th aria-label="Lg" class="poptip sort_default_asc center" data-stat="lg_id" data-tip="League" scope="col">Lg</th><th aria-label="Salary" class="poptip right" data-stat="salary" scope="col">Salary</th></tr>

In [9]:
# get all headers of table as a list
headers = [th.getText() for th in soup.findAll('tr', limit=1)[0].findAll('th')]
print(headers)
# get all headers of table as a list
rows = soup.findAll('tr')[1:]
rows_data = [[td.getText() for td in rows[i].findAll('td')]
                    for i in range(len(rows))]

# change money to numeric
for row in rows_data:
    row[-1] = int(row[-1].strip('$').replace(',',''))
    
df = pd.DataFrame(rows_data)
df.columns = headers[1:]
df[headers[0]] = [[td.getText() for td in rows[i].findAll('th')][0] for i in range(len(rows))]
df = df[headers]
df

['Season', 'Team', 'Lg', 'Salary']


Unnamed: 0,Season,Team,Lg,Salary
0,2003-04,Cleveland Cavaliers,NBA,4018920
1,2004-05,Cleveland Cavaliers,NBA,4320360
2,2005-06,Cleveland Cavaliers,NBA,4621800
3,2006-07,Cleveland Cavaliers,NBA,5828090
4,2007-08,Cleveland Cavaliers,NBA,13041250
5,2008-09,Cleveland Cavaliers,NBA,14410581
6,2009-10,Cleveland Cavaliers,NBA,15779912
7,2010-11,Miami Heat,NBA,14500000
8,2011-12,Miami Heat,NBA,16022500
9,2012-13,Miami Heat,NBA,17545000


### Repeat with other test URLs to verify

In [13]:
url_path_beg = "https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fplayers%2F"
url_path_mid = "%2F"
url_path_end = ".html&div=div_all_salaries"
np.random.seed(29)
test_id = np.random.choice(np.unique(histor_raptor_player["player_id"]))

test_url  = url_path_beg + test_id[0] + url_path_mid + test_id + url_path_end
test_url

'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fplayers%2Fw%2Fwillifr02.html&div=div_all_salaries'

In [14]:
test_id

'willifr02'

In [15]:
# url = test_url
# # get HTML data
# html = urlopen(url)

page = requests.get(url)
# Extract the HTML text.
html = page.text

# get bs4 obj from HTML
soup = BeautifulSoup(html, features = "lxml")

# get all headers of table as a list
headers = [th.getText() for th in soup.findAll('tr', limit=1)[0].findAll('th')]
print(headers)
# get all headers of table as a list
rows = soup.findAll('tr')[1:]
rows_data = [[td.getText() for td in rows[i].findAll('td')]
                    for i in range(len(rows))]

# change money to numeric
for row in rows_data:
    try:
        row[-1] = int(row[-1].strip('$').replace(',',''))
    except:
        print("\"{}\" couldn't be converted to numeric".format(row[-1]))
        print("replaced with 0")
        row[-1] = 0
df = pd.DataFrame(rows_data)
df.columns = headers[1:]
df[headers[0]] = [[td.getText() for td in rows[i].findAll('th')][0] for i in range(len(rows))]
df = df[headers]

df["player_id"] = [test_id]* df.shape[0]
df

['Season', 'Team', 'Lg', 'Salary']


Unnamed: 0,Season,Team,Lg,Salary,player_id
0,2003-04,Cleveland Cavaliers,NBA,4018920,willifr02
1,2004-05,Cleveland Cavaliers,NBA,4320360,willifr02
2,2005-06,Cleveland Cavaliers,NBA,4621800,willifr02
3,2006-07,Cleveland Cavaliers,NBA,5828090,willifr02
4,2007-08,Cleveland Cavaliers,NBA,13041250,willifr02
5,2008-09,Cleveland Cavaliers,NBA,14410581,willifr02
6,2009-10,Cleveland Cavaliers,NBA,15779912,willifr02
7,2010-11,Miami Heat,NBA,14500000,willifr02
8,2011-12,Miami Heat,NBA,16022500,willifr02
9,2012-13,Miami Heat,NBA,17545000,willifr02


### Get Salary Data for Every Player

In [24]:
all_players = np.unique(histor_raptor_player["player_id"])
len(all_players)

3591

In [25]:
test_id

'willifr02'

In [None]:
%%time
dfs = []

issues = []
min_salary_issues = []

url_path_beg = "https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fplayers%2F"
url_path_mid = "%2F"
url_path_end = ".html&div=div_all_salaries"
count = 0

for test_id in tqdm(all_players):
    time.sleep(3)
    try:
        test_url  = url_path_beg + test_id[0] + url_path_mid + test_id + url_path_end
        url = test_url
#         print("Player:", test_id)
        # get HTML data
#         html = urlopen(url)
        page = requests.get(url)
        # if it hits rate-limt, wait and retry.
        if 'Retry-After' in page.headers:
            time.sleep(page.headers['Retry-After'] + 1)
            page = requests.get(url)
        html = page.text
        # get bs4 obj from HTML
        soup = BeautifulSoup(html, features = "lxml")

        # get all headers of table as a list
        headers = [th.getText() for th in soup.findAll('tr', limit=1)[0].findAll('th')]
#         print(headers)
        # get all headers of table as a list
        rows = soup.findAll('tr')[1:]
        rows_data = [[td.getText() for td in rows[i].findAll('td')]
                            for i in range(len(rows))]

        # change money to numeric
        for row in rows_data:
            try:
                row[-1] = int(row[-1].strip('$').replace(',',''))
            except:
#                 print("\"{}\" couldn't be converted to numeric".format(row[-1]))
#                 print("replaced with 0")
                min_salary_issues.append(test_id)
                row[-1] = 0
        df = pd.DataFrame(rows_data)
        df.columns = headers[1:]
        df[headers[0]] = [[td.getText() for td in rows[i].findAll('th')][0] for i in range(len(rows))]
        df = df[headers]

        df["player_id"] = [test_id]* df.shape[0]
        dfs.append(df)
    except:
        issues.append(test_id)
        print("\t\tThere was an issue with {}".format(test_id))
    count += 1
    if count % 100 == 0:
        print("Finished {} of {}".format(count,len(all_players) ))

  0%|          | 0/3591 [00:00<?, ?it/s]

		There was an issue with abdulza01
		There was an issue with abernto01
		There was an issue with adamsdo01
		There was an issue with akoonde01
		There was an issue with aleksch01
		There was an issue with allenlu01
		There was an issue with allenra01
		There was an issue with allrela01
		There was an issue with allumda01
		There was an issue with alumape01
		There was an issue with anderan02
		There was an issue with anderdw01
		There was an issue with anderje01
		There was an issue with anderjj01
		There was an issue with anderki01
		There was an issue with andermi01
Finished 100 of 3591
		There was an issue with architi01
		There was an issue with ardji01
		There was an issue with arlaujo01
		There was an issue with armstta01
		There was an issue with austike01
		There was an issue with averibi01
		There was an issue with awtrede01
		There was an issue with baileca01
		There was an issue with bailegu01
		There was an issue with bakerla01
		There was an issue with bakerma01
		There w

		There was an issue with gillihe01
		There was an issue with givenja01
		There was an issue with glennmi01
		There was an issue with gondrgl01
		There was an issue with goodrga01
		There was an issue with graceri01
		There was an issue with grahaor01
		There was an issue with grangst01
		There was an issue with gravebu01
		There was an issue with grayev01
Finished 1200 of 3591
		There was an issue with grayle01
		There was an issue with greenke01
		There was an issue with greenmi01
		There was an issue with greenst01
		There was an issue with greento01
		There was an issue with greigjo01
		There was an issue with griffgr01
		There was an issue with griffpa01
		There was an issue with grossbo01
		There was an issue with grundan01
		There was an issue with hackeru01
		There was an issue with haganas01
		There was an issue with hagangl01
		There was an issue with hamilro01
		There was an issue with hansegl01
		There was an issue with hansela01
		There was an issue with hansore01
		There 

		There was an issue with nilesmi01
		There was an issue with nixdy01
		There was an issue with normaco01
		There was an issue with norrisy01
		There was an issue with norwowi01
		There was an issue with nuttde01
Finished 2400 of 3591
		There was an issue with oleynfr01
		There was an issue with olivejo01
		There was an issue with owensed01
		There was an issue with owensto01
		There was an issue with pacejo01
		There was an issue with parkeso01
		There was an issue with paspaza01
		There was an issue with patrimy01
		There was an issue with paultbi01
		There was an issue with peckwi01
		There was an issue with pellosa01
		There was an issue with penigde01
Finished 2500 of 3591
		There was an issue with perrycu01
		There was an issue with pheglro01
		There was an issue with phelpde01
		There was an issue with philled01
		There was an issue with pietkst01


In [36]:
dfs

[    Season                    Team   Lg   Salary  player_id
 0  1990-91  Portland Trail Blazers  NBA   395000  abdelal01
 1  1991-92  Portland Trail Blazers  NBA   494000  abdelal01
 2  1992-93          Boston Celtics  NBA   500000  abdelal01
 3  1993-94          Boston Celtics  NBA   805000  abdelal01
 4  1994-95        Sacramento Kings  NBA   650000  abdelal01
 5   Career     (may be incomplete)       2844000  abdelal01,
     Season                 Team   Lg   Salary  player_id
 0  1984-85   Los Angeles Lakers  NBA  1530000  abdulka01
 1  1985-86   Los Angeles Lakers  NBA  2030000  abdulka01
 2  1987-88   Los Angeles Lakers  NBA  2000000  abdulka01
 3  1988-89   Los Angeles Lakers  NBA  3000000  abdulka01
 4   Career  (may be incomplete)       8560000  abdulka01,
     Season                 Team   Lg    Salary  player_id
 0  1990-91       Denver Nuggets  NBA   1660000  abdulma02
 1  1991-92       Denver Nuggets  NBA   2008000  abdulma02
 2  1992-93       Denver Nuggets  NBA   235800

In [37]:
len(issues)

670

In [38]:
len(min_salary_issues)

353

In [39]:
len(np.unique(min_salary_issues))

272

In [41]:
len(dfs) + 670

3591

670 players did not have salary data on Basketball Reference. Of the remaining players, 272 players had less than the minumum tracked value for a salary. This is typically indicative of players on 10-day contracts.

In [42]:
dfs

[    Season                    Team   Lg   Salary  player_id
 0  1990-91  Portland Trail Blazers  NBA   395000  abdelal01
 1  1991-92  Portland Trail Blazers  NBA   494000  abdelal01
 2  1992-93          Boston Celtics  NBA   500000  abdelal01
 3  1993-94          Boston Celtics  NBA   805000  abdelal01
 4  1994-95        Sacramento Kings  NBA   650000  abdelal01
 5   Career     (may be incomplete)       2844000  abdelal01,
     Season                 Team   Lg   Salary  player_id
 0  1984-85   Los Angeles Lakers  NBA  1530000  abdulka01
 1  1985-86   Los Angeles Lakers  NBA  2030000  abdulka01
 2  1987-88   Los Angeles Lakers  NBA  2000000  abdulka01
 3  1988-89   Los Angeles Lakers  NBA  3000000  abdulka01
 4   Career  (may be incomplete)       8560000  abdulka01,
     Season                 Team   Lg    Salary  player_id
 0  1990-91       Denver Nuggets  NBA   1660000  abdulma02
 1  1991-92       Denver Nuggets  NBA   2008000  abdulma02
 2  1992-93       Denver Nuggets  NBA   235800

In [43]:
player_salaries = pd.concat(dfs)

In [44]:
player_salaries

Unnamed: 0,Season,Team,Lg,Salary,player_id
0,1990-91,Portland Trail Blazers,NBA,395000,abdelal01
1,1991-92,Portland Trail Blazers,NBA,494000,abdelal01
2,1992-93,Boston Celtics,NBA,500000,abdelal01
3,1993-94,Boston Celtics,NBA,805000,abdelal01
4,1994-95,Sacramento Kings,NBA,650000,abdelal01
...,...,...,...,...,...
2,2018-19,Los Angeles Clippers,NBA,1544951,zubaciv01
3,2019-20,Los Angeles Clippers,NBA,6481482,zubaciv01
4,2020-21,Los Angeles Clippers,NBA,7000000,zubaciv01
5,2021-22,Los Angeles Clippers,NBA,7518518,zubaciv01


In [53]:
len(player_salaries.player_id.unique())

2921

In [60]:
sorted(player_salaries['Team'].unique())

['',
 '(may be incomplete)',
 'Atlanta Hawks',
 'Boston Celtics',
 'Brooklyn Nets',
 'Charlotte Bobcats',
 'Charlotte Hornets',
 'Chicago Bulls',
 'Cleveland Cavaliers',
 'Dallas Mavericks',
 'Denver Nuggets',
 'Detroit Pistons',
 'Golden State Warriors',
 'Houston Rockets',
 'Indiana Pacers',
 'Kansas City Kings',
 'Los Angeles Clippers',
 'Los Angeles Lakers',
 'Memphis Grizzlies',
 'Miami Heat',
 'Milwaukee Bucks',
 'Minnesota Timberwolves',
 'New Jersey Nets',
 'New Orleans Hornets',
 'New Orleans Pelicans',
 'New Orleans/Oklahoma City Hornets',
 'New York Knicks',
 'Oklahoma City Thunder',
 'Orlando Magic',
 'Philadelphia 76ers',
 'Phoenix Suns',
 'Portland Trail Blazers',
 'Sacramento Kings',
 'San Antonio Spurs',
 'Seattle SuperSonics',
 'Toronto Raptors',
 'Utah Jazz',
 'Vancouver Grizzlies',
 'Washington Bullets',
 'Washington Wizards']

In [63]:
player_salaries[player_salaries['Team'] == '']

Unnamed: 0,Season,Team,Lg,Salary,player_id
0,1984-85,,NBA,600000,cartwbi01
0,2013-14,,NBA,524616,mccalra01
0,1984-85,,NBA,450000,webstma01
0,1984-85,,NBA,120000,williro02


There are 2921 players with salary data.
a
Some data in `new_salaries` don't have team information. A little digging yields the following:

* Bill Cartwright missed 1984-1985 season with foot injuries but was contracted to the Knicks (NYK) so we should fill that in. 
* Marvin Webster missed the 1984-85 season with illness but was contracted to the Knicks (NYK)(rough year for the Knicks).
* Rob Williams missed the 1984-85 season but was contracted to the Denver Nuggets (DEN). 
* Finally, Ray McCallum played for the Sacramento Kings (SAC). 

Thus, we can fill this in.

In [65]:
player_salaries.loc[player_salaries['Team'] == '', 'Team'] = ['New York Knicks', 'Sacramento Kings', 'New York Knicks', 'Denver Nuggets']

In [72]:
player_salaries[player_salaries['Team'] == '']

Unnamed: 0,Season,Team,Lg,Salary,player_id


In [73]:
player_salaries.to_csv("./raw/player_salaries.csv", index=False)

### Players with & without salaries

In [45]:
players_no_salary = histor_raptor_player[histor_raptor_player["player_id"].isin(issues)].sort_values(by = ["season"])

In [46]:
players_no_salary

Unnamed: 0,player_name,player_id,season,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact
33,Zaid Abdul-Aziz,abdulza01,1977,441,195,-1.926415,-0.368835,-2.295249,0.044927,0.044927,0.000000,-1.817927,-1.624327,-3.442253,-0.751147
11247,Andre McCarter,mccaran01,1977,1626,725,-1.492848,-2.317706,-3.810554,-0.391915,-0.391915,0.000000,-1.922424,-2.766629,-4.689053,-0.104074
3020,Cornelius Cash,cashco01,1977,112,49,-3.594243,-1.189121,-4.783363,-0.051093,-0.051093,0.000000,-3.125765,-1.594401,-4.720165,0.550192
11271,Ted McClain,mcclate01,1977,5070,2172,-0.161863,0.269302,0.107439,3.197087,2.768673,0.428414,-0.348780,0.635632,0.286852,0.359923
11323,Paul McCracken,mccrapa01,1977,251,119,-3.008264,-0.452885,-3.461149,-0.043557,-0.043557,0.000000,-2.965984,-0.568620,-3.534603,0.949153
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10406,Zach Lofton,loftoza01,2019,9,4,-16.066038,12.480421,-3.585617,-0.001840,-0.001840,0.000000,-8.381958,8.192370,-0.189588,8.076561
4263,Tyler Davis,davisty01,2019,2,1,-40.825921,61.113882,20.287961,0.011423,0.011423,0.000000,-26.935305,42.989063,16.053758,-2.873834
182,DeVaughn Akoon-Purcell,akoonde01,2019,49,22,-2.229499,-1.019840,-3.249339,-0.005556,-0.005556,0.000000,-1.262366,0.661520,-0.600846,-0.894708
13873,Shamorie Ponds,pondssh01,2020,23,11,12.491487,-29.295219,-16.803733,-0.077846,-0.077846,0.000000,8.207066,-23.489402,-15.282336,-1.328036


In [47]:
players_no_salary.describe()

Unnamed: 0,season,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact
count,1648.0,1648.0,1648.0,1648.0,1648.0,1648.0,1648.0,1648.0,1648.0,1648.0,1648.0,1648.0,1648.0
mean,1981.996359,2296.702063,1046.037621,-2.237456,-0.752681,-2.990137,0.991085,0.931063,0.060023,-2.292308,-0.988353,-3.280661,0.388731
std,6.450878,2078.544534,939.74857,4.206833,2.848097,5.306096,2.100115,1.969226,0.264837,3.828634,2.627264,5.252201,0.982537
min,1977.0,2.0,1.0,-43.263751,-43.212808,-56.491613,-2.800014,-2.800014,-0.470393,-38.768411,-31.747427,-57.005728,-2.873834
25%,1978.0,342.75,158.0,-3.525301,-1.725835,-4.593628,-0.17307,-0.173123,0.0,-3.613269,-1.992242,-5.153557,-0.097153
50%,1980.0,1786.5,822.5,-1.655203,-0.748221,-2.31035,0.073011,0.072576,0.0,-1.862145,-0.928048,-2.662386,0.224764
75%,1983.0,3837.5,1750.25,-0.183502,0.178194,-0.44599,1.489196,1.429052,0.0,-0.3554,0.096205,-0.587453,0.681101
max,2021.0,9271.0,4114.0,35.634015,61.113882,37.776924,10.724776,10.041522,3.327882,32.053844,42.989063,36.563969,11.219749


In [48]:
players_with_salary = histor_raptor_player[~histor_raptor_player["player_id"].isin(issues)].sort_values(by = ["season"])

In [49]:
players_with_salary.describe()

Unnamed: 0,season,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact
count,17511.0,17511.0,17511.0,17511.0,17511.0,17511.0,17511.0,17511.0,17511.0,17511.0,17511.0,17511.0,17510.0
mean,2003.785963,2796.446348,1392.850266,-1.192706,-0.297486,-1.490192,2.074042,1.872933,0.201109,-1.190748,-0.49082,-1.681568,0.178429
std,11.82792,2023.090908,1004.500506,3.415726,2.424509,4.423251,3.545983,3.146504,0.609922,3.139113,2.268689,4.325828,0.808669
min,1977.0,1.0,0.0,-82.192072,-56.98252,-103.084801,-7.382978,-7.382978,-1.37652,-71.510086,-37.871745,-101.44265,-7.191955
25%,1994.0,948.0,473.5,-2.764912,-1.382578,-3.466068,-0.107835,-0.108322,0.0,-2.800236,-1.594956,-3.880139,-0.253424
50%,2005.0,2617.0,1304.0,-1.045328,-0.3677,-1.267597,0.66663,0.636578,0.0,-1.128172,-0.491278,-1.515482,0.082191
75%,2014.0,4379.0,2173.0,0.615331,0.75301,0.839577,3.275003,3.04767,0.090096,0.540014,0.683719,0.79476,0.502422
max,2022.0,8822.0,4270.0,53.228864,62.469205,72.622361,28.762877,24.355978,7.006934,42.890281,41.704602,49.106166,23.732063


In [50]:
players_with_salary[players_with_salary["mp"] <10]

Unnamed: 0,player_name,player_id,season,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact
3612,Michael Cooper,coopemi01,1979,16,7,-5.462788,3.235257,-2.227531,0.001900,0.001900,0.0,-4.077774,2.382378,-1.695396,1.928041
9091,Charles Jones,jonesch01,1984,6,3,-24.663904,-3.683648,-28.347552,-0.042313,-0.042313,0.0,-21.478926,-1.359169,-22.838095,8.214553
6748,Claude Gregory,gregocl01,1986,4,2,-12.765384,17.215010,4.449626,0.007932,0.007932,0.0,-2.587644,15.673051,13.085407,8.241317
9968,Jeff Lamp,lampje01,1988,15,7,0.290932,-3.592323,-3.301391,-0.001970,-0.001970,0.0,-1.470122,-3.883349,-5.353471,0.047175
6066,Kenny Gattison,gattike01,1989,20,9,-8.614034,-2.526589,-11.140623,-0.039171,-0.039171,0.0,-8.738674,-1.988620,-10.727294,1.700383
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3205,Zylan Cheatham,cheatzy01,2022,11,5,-34.628376,-5.797543,-40.425919,-0.096653,-0.096653,0.0,-21.878324,-6.671908,-28.550232,0.564133
9942,Anthony Lamb,lamban01,2022,18,8,-6.806764,4.262697,-2.544067,0.000832,0.000832,0.0,-2.453993,-0.668941,-3.122934,-0.988902
9852,Arnoldas Kulboka,kulboar01,2022,11,5,-12.892988,-28.742160,-41.635148,-0.099222,-0.099222,0.0,-14.911416,-20.834041,-35.745457,0.026332
16643,Jon Teske,teskejo01,2022,19,8,-13.195683,-5.408641,-18.604324,-0.064202,-0.064202,0.0,-9.881658,-2.920075,-12.801733,-0.786854


In [51]:
players_no_salary[players_no_salary["mp"]>500]

Unnamed: 0,player_name,player_id,season,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact
11247,Andre McCarter,mccaran01,1977,1626,725,-1.492848,-2.317706,-3.810554,-0.391915,-0.391915,0.000000,-1.922424,-2.766629,-4.689053,-0.104074
11271,Ted McClain,mcclate01,1977,5070,2172,-0.161863,0.269302,0.107439,3.197087,2.768673,0.428414,-0.348780,0.635632,0.286852,0.359923
11400,Jim McElroy,mcelrji01,1977,4583,2029,0.101299,-1.052118,-0.950819,1.854155,1.854155,0.000000,-0.285936,-0.952180,-1.238117,-0.483261
2954,Fred Carter,cartefr01,1977,2538,1112,-1.697633,-2.249363,-3.946995,-0.680078,-0.680078,0.000000,-1.824971,-2.195552,-4.020523,0.169372
11432,George McGinnis,mcginge01,1977,7682,3372,0.641613,1.252292,1.893905,7.996030,7.799804,0.196226,1.469291,1.367758,2.837049,0.217025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
280,Randy Allen,allenra01,1990,1520,746,-3.464879,-1.076264,-4.541144,-0.684823,-0.684823,0.000000,-4.048901,-1.300064,-5.348965,0.452391
9061,Anthony Jones,jonesan01,1990,1300,653,-4.437675,0.068623,-4.369053,-0.542879,-0.534785,-0.008094,-4.535047,-0.549730,-5.084777,0.621497
9552,Warren Kidd,kiddwa01,1994,1783,884,-2.107090,-1.477960,-3.585051,-0.378412,-0.378412,0.000000,-2.674652,-1.293625,-3.968277,0.457497
10502,Ryan Lorthridge,lorthry01,1995,1386,672,-1.306732,-1.656392,-2.963124,-0.072841,-0.072841,0.000000,-1.782613,-2.308495,-4.091108,-0.295026


### Other salary data sites
--- 
https://www.spotrac.com/nba/cba/minimum/