In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options


In [2]:
def set_chrome_options() -> None:
    """Sets chrome options for Selenium.
    Chrome options for headless browser is enabled.
    """
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_prefs = {}
    chrome_options.experimental_options["prefs"] = chrome_prefs
    chrome_prefs["profile.default_content_settings"] = {"images": 2}
    return chrome_options


In [3]:
chrome_options = set_chrome_options()
driver = webdriver.Chrome(options=chrome_options)


In [5]:
url = 'https://www.basketball-reference.com/leaders/g_career.html'
driver.get(url)


In [6]:
all_tbl = driver.find_element(By.ID, 'all_tot')

all_tbl.text


"NBA/ABA\nShare & Export\nRank Player G\n1. Robert Parish* 1611\n2. Kareem Abdul-Jabbar* 1560\n3. Vince Carter 1541\n4. Dirk Nowitzki 1522\n5. John Stockton* 1504\n6. Karl Malone* 1476\n7. Kevin Garnett* 1462\n8. Moses Malone* 1455\n9. Kevin Willis 1424\n10. Jason Terry 1410\n11. Tim Duncan* 1392\n12. Jason Kidd* 1391\n13. Reggie Miller* 1389\n14. Clifford Robinson 1380\n15. Kobe Bryant* 1346\n16. Paul Pierce* 1343\n17. Gary Payton* 1335\n18. LeBron James 1331\n19. Artis Gilmore* 1329\n20. Jamal Crawford 1327\nRank Player G\n21. Buck Williams 1307\n22. Andre Miller 1304\n23. Elvin Hayes* 1303\n24. Ray Allen* 1300\n25. Caldwell Jones 1299\n26. Mark Jackson 1296\n27. Derek Fisher 1287\n28. Sam Perkins 1286\n29. Charles Oakley 1282\n30. A.C. Green 1278\n31. Joe Johnson 1277\n32. Terry Porter 1274\n33. John Havlicek* 1270\n34. Otis Thorpe 1257\n35. Tony Parker 1254\n  Paul Silas 1254\n37. Julius Erving* 1243\n38. Hakeem Olajuwon* 1238\n39. Kyle Korver 1232\n40. Pau Gasol 1226\nRank Player 

In [13]:
rows = all_tbl.find_element(By.TAG_NAME, 'tbody').find_elements(By.TAG_NAME, 'tr')

top_10_list = []

for row in rows[:10]:
    print(row.text)
    
    top_10_entry = {}
    
    top_10_entry['rank'] = row.find_elements(By.TAG_NAME, 'td')[0].text
    top_10_entry['name'] = row.find_elements(By.TAG_NAME, 'td')[1].text
    top_10_entry['link'] = row.find_elements(By.TAG_NAME, 'td')[1].find_element(By.TAG_NAME, 'a').get_attribute('href')
    top_10_entry['value'] = row.find_elements(By.TAG_NAME, 'td')[2].text
    
    top_10_list.append(top_10_entry)
    
top_10_list


1. Robert Parish* 1611
2. Kareem Abdul-Jabbar* 1560
3. Vince Carter 1541
4. Dirk Nowitzki 1522
5. John Stockton* 1504
6. Karl Malone* 1476
7. Kevin Garnett* 1462
8. Moses Malone* 1455
9. Kevin Willis 1424
10. Jason Terry 1410


[{'rank': '1.',
  'name': 'Robert Parish*',
  'link': 'https://www.basketball-reference.com/players/p/parisro01.html',
  'value': '1611'},
 {'rank': '2.',
  'name': 'Kareem Abdul-Jabbar*',
  'link': 'https://www.basketball-reference.com/players/a/abdulka01.html',
  'value': '1560'},
 {'rank': '3.',
  'name': 'Vince Carter',
  'link': 'https://www.basketball-reference.com/players/c/cartevi01.html',
  'value': '1541'},
 {'rank': '4.',
  'name': 'Dirk Nowitzki',
  'link': 'https://www.basketball-reference.com/players/n/nowitdi01.html',
  'value': '1522'},
 {'rank': '5.',
  'name': 'John Stockton*',
  'link': 'https://www.basketball-reference.com/players/s/stockjo01.html',
  'value': '1504'},
 {'rank': '6.',
  'name': 'Karl Malone*',
  'link': 'https://www.basketball-reference.com/players/m/malonka01.html',
  'value': '1476'},
 {'rank': '7.',
  'name': 'Kevin Garnett*',
  'link': 'https://www.basketball-reference.com/players/g/garneke01.html',
  'value': '1462'},
 {'rank': '8.',
  'name': 

In [16]:
def get_top_n_list(url, n=10):
    driver.get(url)
    
    all_tbl = driver.find_element(By.ID, 'all_tot')

    rows = all_tbl.find_element(By.TAG_NAME, 'tbody').find_elements(By.TAG_NAME, 'tr')

    top_n_list = []

    for row in rows[:n]:

        entry = {}

        entry['rank'] = row.find_elements(By.TAG_NAME, 'td')[0].text
        entry['name'] = row.find_elements(By.TAG_NAME, 'td')[1].text
        entry['link'] = row.find_elements(By.TAG_NAME, 'td')[1].find_element(By.TAG_NAME, 'a').get_attribute('href')
        entry['value'] = row.find_elements(By.TAG_NAME, 'td')[2].text

        top_n_list.append(entry)

    return top_n_list
    
t10_list = get_top_n_list('https://www.basketball-reference.com/leaders/g_career.html', 10)

t10_list

[{'rank': '1.',
  'name': 'Robert Parish*',
  'link': 'https://www.basketball-reference.com/players/p/parisro01.html',
  'value': '1611'},
 {'rank': '2.',
  'name': 'Kareem Abdul-Jabbar*',
  'link': 'https://www.basketball-reference.com/players/a/abdulka01.html',
  'value': '1560'},
 {'rank': '3.',
  'name': 'Vince Carter',
  'link': 'https://www.basketball-reference.com/players/c/cartevi01.html',
  'value': '1541'},
 {'rank': '4.',
  'name': 'Dirk Nowitzki',
  'link': 'https://www.basketball-reference.com/players/n/nowitdi01.html',
  'value': '1522'},
 {'rank': '5.',
  'name': 'John Stockton*',
  'link': 'https://www.basketball-reference.com/players/s/stockjo01.html',
  'value': '1504'},
 {'rank': '6.',
  'name': 'Karl Malone*',
  'link': 'https://www.basketball-reference.com/players/m/malonka01.html',
  'value': '1476'},
 {'rank': '7.',
  'name': 'Kevin Garnett*',
  'link': 'https://www.basketball-reference.com/players/g/garneke01.html',
  'value': '1462'},
 {'rank': '8.',
  'name': 