In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

In [3]:
def set_chrome_options() -> None:
    """Sets chrome options for Selenium.
    Chrome options for headless browser is enabled.
    """
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_prefs = {}
    chrome_options.experimental_options["prefs"] = chrome_prefs
    chrome_prefs["profile.default_content_settings"] = {"images": 2}
    return chrome_options

In [4]:
chrome_options = set_chrome_options()
driver = webdriver.Chrome(options=chrome_options)

global driver

In [5]:
def connect_to_url(url):
    return driver.get(url)

url = 'https://scores.nbcsports.com/golf/final.asp?tour=PGA'
connect_to_url(url)



In [7]:
def get_scoreboard_table():
    return driver.find_elements(By.TAG_NAME, "table")[1]
    
table = get_scoreboard_table()

In [9]:
def get_table_rows(table):
    return table.find_elements(By.TAG_NAME, 'tr')
    
trs = get_table_rows(table)


In [11]:
%%time

scoreboard = []

for tr in trs:
    if tr.get_attribute('class') not in ['shsTableTtlRow', 'shsTableSubttlRow', 'shsColTtlRow']:
        player_score = {}
        tds = tr.find_elements(By.TAG_NAME, 'td')
        
        player_score['name'] = tds[1].text
        player_score['round1'] = tds[2].text
        player_score['round2'] = tds[3].text
        player_score['round3'] = tds[4].text
        player_score['round4'] = tds[5].text
    
        scoreboard.append(player_score)
        
scoreboard


CPU times: user 878 ms, sys: 104 ms, total: 981 ms
Wall time: 16 s


[{'name': 'Jhonattan Vegas',
  'round1': '64',
  'round2': ' ',
  'round3': ' ',
  'round4': ' '},
 {'name': 'David Lipsky',
  'round1': '-7',
  'round2': ' ',
  'round3': ' ',
  'round4': ' '},
 {'name': 'Danny Lee',
  'round1': '65',
  'round2': ' ',
  'round3': ' ',
  'round4': ' '},
 {'name': 'Scott Stallings',
  'round1': '65',
  'round2': ' ',
  'round3': ' ',
  'round4': ' '},
 {'name': 'Richy Werenski',
  'round1': '65',
  'round2': ' ',
  'round3': ' ',
  'round4': ' '},
 {'name': 'Sam Burns',
  'round1': '-6',
  'round2': ' ',
  'round3': ' ',
  'round4': ' '},
 {'name': 'Kramer Hickok',
  'round1': '66',
  'round2': ' ',
  'round3': ' ',
  'round4': ' '},
 {'name': 'Doc Redman',
  'round1': '66',
  'round2': ' ',
  'round3': ' ',
  'round4': ' '},
 {'name': 'Justin Thomas',
  'round1': '66',
  'round2': ' ',
  'round3': ' ',
  'round4': ' '},
 {'name': 'Adam Hadwin',
  'round1': '-5',
  'round2': ' ',
  'round3': ' ',
  'round4': ' '},
 {'name': 'Davis Riley',
  'round1': '-

In [12]:
import pandas as pd



In [114]:
df = pd.DataFrame(scoreboard)

df

Unnamed: 0,name,round1,round2,round3,round4
0,Jhonattan Vegas,64,,,
1,David Lipsky,-7,,,
2,Danny Lee,65,,,
3,Scott Stallings,65,,,
4,Richy Werenski,65,,,
...,...,...,...,...,...
139,Stephan Jaeger,76,,,
140,Cameron Percy,76,,,
141,Francesco Molinari,+5,,,
142,Charley Hoffman,+5,,,


In [115]:
df.set_index('name', inplace=True)

In [116]:
df.round1.unique()

array(['64', '-7', '65', '-6', '66', '-5', '67', '-4', '68', '-3', '69',
       '-2', '70', '-1', '71', 'E', '72', '+1', '73', '+2', '74', '+3',
       '75', '+4', '76', '+5', '78'], dtype=object)

In [117]:
def convert_to_plus_minus(score: str, par: int):
    return int(score) - par

convert_to_plus_minus('64', 71)

-7

In [118]:
int('-5') + int('+5')

0

In [119]:
def convert_even():
    return 0

In [129]:
def clean_up_distributor(score, par):
    if isinstance(score, int):
        return score
    elif score == 'E':
        return convert_even()
    elif '+' in score or '-' in score:
        return int(score)
    elif score == ' ':
        return 0
    elif isinstance(score, str):
        return convert_to_plus_minus(score, par)
    else:
        print('method not implemented')
        
print(clean_up_distributor('E', 71))
print(clean_up_distributor('+7', 71))
print(clean_up_distributor('72', 71))

0
7
1


In [130]:
df

Unnamed: 0_level_0,round1,round2,round3,round4
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Jhonattan Vegas,-7,0,0,0
David Lipsky,-7,0,0,0
Danny Lee,-6,0,0,0
Scott Stallings,-6,0,0,0
Richy Werenski,-6,0,0,0
...,...,...,...,...
Stephan Jaeger,5,0,0,0
Cameron Percy,5,0,0,0
Francesco Molinari,+5,0,0,0
Charley Hoffman,+5,0,0,0


In [131]:
def update_columns(df, par):
    for col in df.columns:
        df[col] = df[col].apply(lambda x: clean_up_distributor(x, par))
        
update_columns(df, 71)

In [132]:
df

Unnamed: 0_level_0,round1,round2,round3,round4
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Jhonattan Vegas,-7,0,0,0
David Lipsky,-7,0,0,0
Danny Lee,-6,0,0,0
Scott Stallings,-6,0,0,0
Richy Werenski,-6,0,0,0
...,...,...,...,...
Stephan Jaeger,5,0,0,0
Cameron Percy,5,0,0,0
Francesco Molinari,5,0,0,0
Charley Hoffman,5,0,0,0


In [137]:
def create_total_col(df):
    df['total'] = df['round1']+df['round2']+df['round3']+df['round4']
    
    return df

In [138]:
df = create_total_col(df)

In [139]:
df

Unnamed: 0_level_0,round1,round2,round3,round4,total
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Jhonattan Vegas,-7,0,0,0,-7
David Lipsky,-7,0,0,0,-7
Danny Lee,-6,0,0,0,-6
Scott Stallings,-6,0,0,0,-6
Richy Werenski,-6,0,0,0,-6
...,...,...,...,...,...
Stephan Jaeger,5,0,0,0,5
Cameron Percy,5,0,0,0,5
Francesco Molinari,5,0,0,0,5
Charley Hoffman,5,0,0,0,5
