In [30]:
# Scraping NBA Hustle Stats
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd

url = "https://www.spotrac.com/nba/free-agents/"


# chrome driver to scrape dynamic webpages
driver = webdriver.Chrome()
driver.get(url)

# beautiful soup to parse it
soup = BeautifulSoup(driver.page_source, 'html.parser')
tmp = soup.find_all('div', {'class': 'relative'})

tables = soup.find_all('table')

t = tables[0]

headers, [_, *data] = [i.text for i in t.find_all(
    'th')], [[i.text for i in b.find_all('td')] for b in t.find_all('tr')]
final_data = [i for i in data if len(i) > 1]

# format headers, for strings with '\n' in it, remove \n
for h in headers:
    if '\n' in h:
        headers[headers.index(h)] = h.replace('\n', ' ')

# headers all lower case
headers = [i.lower() for i in headers]

data_attrs = [dict(zip(headers, i)) for i in final_data]

df = pd.DataFrame(data_attrs)

In [31]:
df

Unnamed: 0,player (214),Unnamed: 2,pos.,age,exp,team,type,2023-24 aav
0,\nJames\nLeBron James\n,\n,PF,39.2,21,\n LAL\n,\nPlayer / $51.4M \n,"$49,511,644"
1,\nGeorge\nPaul George\n,\n,SG,33.8,14,\n LAC\n,\nPlayer / $48.8M \n,"$44,066,367"
2,\nThompson\nKlay Thompson\n,\n,SG,34.1,13,\n GSW\n,\nUFA : Bird \n,"$37,980,720"
3,\nHarris\nTobias Harris\n,\n,PF,31.7,13,\n PHI\n,\nUFA : Bird \n,"$36,000,000"
4,\nHarden\nJames Harden\n,\n,SG,34.5,14,\n LAC\n,\nUFA : Bird \n,"$34,320,000"
...,...,...,...,...,...,...,...,...
209,\nDowtin\nJeff Dowtin\n,\n,PG,26.8,3,\n PHI\n,\nRFA : Two-Way \n,-0
210,\nGilyard\nJacob Gilyard\n,\n,PG,25.7,2,\n BKN\n,\nRFA : Two-Way \n,-0
211,\nJackson\nQuenton Jackson\n,\n,G,25.5,2,\n IND\n,\nRFA : Two-Way \n,-0
212,\nWindler\nDylan Windler\n,\n,SF,27.5,5,\n ATL\n,\nRFA : Two-Way \n,-0


In [32]:
# Clean the DataFrame
# Remove newline characters from 'player (214)' column and rename it to 'player'
df.rename(columns={'player (214)': 'player', 'pos.': 'position', '2023-24 aav': 'salary'}, inplace=True)
df['player'] = df['player'].str.replace('\n', '')

df['team'] = df['team'].str.replace('\n ', '')
df['team'] = df['team'].str.replace('\n', '')
df['type'] = df['type'].str.replace('\n', '')

In [33]:
df

Unnamed: 0,player,Unnamed: 2,position,age,exp,team,type,salary
0,JamesLeBron James,\n,PF,39.2,21,LAL,Player / $51.4M,"$49,511,644"
1,GeorgePaul George,\n,SG,33.8,14,LAC,Player / $48.8M,"$44,066,367"
2,ThompsonKlay Thompson,\n,SG,34.1,13,GSW,UFA : Bird,"$37,980,720"
3,HarrisTobias Harris,\n,PF,31.7,13,PHI,UFA : Bird,"$36,000,000"
4,HardenJames Harden,\n,SG,34.5,14,LAC,UFA : Bird,"$34,320,000"
...,...,...,...,...,...,...,...,...
209,DowtinJeff Dowtin,\n,PG,26.8,3,PHI,RFA : Two-Way,-0
210,GilyardJacob Gilyard,\n,PG,25.7,2,BKN,RFA : Two-Way,-0
211,JacksonQuenton Jackson,\n,G,25.5,2,IND,RFA : Two-Way,-0
212,WindlerDylan Windler,\n,SF,27.5,5,ATL,RFA : Two-Way,-0


In [34]:
df.drop(df.columns[1], axis=1, inplace=True)
df

Unnamed: 0,player,position,age,exp,team,type,salary
0,JamesLeBron James,PF,39.2,21,LAL,Player / $51.4M,"$49,511,644"
1,GeorgePaul George,SG,33.8,14,LAC,Player / $48.8M,"$44,066,367"
2,ThompsonKlay Thompson,SG,34.1,13,GSW,UFA : Bird,"$37,980,720"
3,HarrisTobias Harris,PF,31.7,13,PHI,UFA : Bird,"$36,000,000"
4,HardenJames Harden,SG,34.5,14,LAC,UFA : Bird,"$34,320,000"
...,...,...,...,...,...,...,...
209,DowtinJeff Dowtin,PG,26.8,3,PHI,RFA : Two-Way,-0
210,GilyardJacob Gilyard,PG,25.7,2,BKN,RFA : Two-Way,-0
211,JacksonQuenton Jackson,G,25.5,2,IND,RFA : Two-Way,-0
212,WindlerDylan Windler,SF,27.5,5,ATL,RFA : Two-Way,-0


In [35]:
def remove_after_space(name):
    # Split the name by whitespace and keep only the first part
    return name.split()[0]
# Apply the remove_repeated_words function to the 'player' column
# Apply the remove_after_space function to the 'player' column
df['player'] = df['player'].apply(remove_after_space)

In [36]:
df

Unnamed: 0,player,position,age,exp,team,type,salary
0,JamesLeBron,PF,39.2,21,LAL,Player / $51.4M,"$49,511,644"
1,GeorgePaul,SG,33.8,14,LAC,Player / $48.8M,"$44,066,367"
2,ThompsonKlay,SG,34.1,13,GSW,UFA : Bird,"$37,980,720"
3,HarrisTobias,PF,31.7,13,PHI,UFA : Bird,"$36,000,000"
4,HardenJames,SG,34.5,14,LAC,UFA : Bird,"$34,320,000"
...,...,...,...,...,...,...,...
209,DowtinJeff,PG,26.8,3,PHI,RFA : Two-Way,-0
210,GilyardJacob,PG,25.7,2,BKN,RFA : Two-Way,-0
211,JacksonQuenton,G,25.5,2,IND,RFA : Two-Way,-0
212,WindlerDylan,SF,27.5,5,ATL,RFA : Two-Way,-0


In [37]:
def move_chars_to_back(name):
    # Initialize variables to store the positions of the first and second uppercase letters
    first_upper_index = None
    second_upper_index = None

    # Iterate through each character in the name
    for i, char in enumerate(name):
        # Check if the character is uppercase and if the previous character is lowercase
        if char.isupper() and (i == 0 or name[i-1].islower()):
            # If it's the first uppercase letter, store its position
            if first_upper_index is None:
                first_upper_index = i
            # If it's the second uppercase letter, store its position and break the loop
            else:
                second_upper_index = i
                break

    # If the second uppercase letter is found, move the characters accordingly
    if second_upper_index is not None:
        return name[second_upper_index:] + ' ' + name[:second_upper_index]
    # If only one uppercase letter is found, return the original name
    elif first_upper_index is not None:
        return name
    # If there are no uppercase letters, return the original name
    else:
        return name

# Example usage:
names = ["JamesLeBron", "GeorgePaul", "ThompsonKlay", "HarrisTobias", "HardenJames", "SiakamPascal"]

for name in names:
    print(move_chars_to_back(name))

LeBron James
Paul George
Klay Thompson
Tobias Harris
James Harden
Pascal Siakam


In [38]:
df['player'] = df['player'].apply(move_chars_to_back)
df

Unnamed: 0,player,position,age,exp,team,type,salary
0,LeBron James,PF,39.2,21,LAL,Player / $51.4M,"$49,511,644"
1,Paul George,SG,33.8,14,LAC,Player / $48.8M,"$44,066,367"
2,Klay Thompson,SG,34.1,13,GSW,UFA : Bird,"$37,980,720"
3,Tobias Harris,PF,31.7,13,PHI,UFA : Bird,"$36,000,000"
4,James Harden,SG,34.5,14,LAC,UFA : Bird,"$34,320,000"
...,...,...,...,...,...,...,...
209,Jeff Dowtin,PG,26.8,3,PHI,RFA : Two-Way,-0
210,Jacob Gilyard,PG,25.7,2,BKN,RFA : Two-Way,-0
211,Quenton Jackson,G,25.5,2,IND,RFA : Two-Way,-0
212,Dylan Windler,SF,27.5,5,ATL,RFA : Two-Way,-0


In [39]:
df.to_csv("FreeAgent.csv")