In [2]:
import selenium
import pandas as pd
import numpy as np

In [3]:
%cd /Users/genevieveberent/Desktop/Zak_Portfolio/PGA_Masters

/Users/genevieveberent/Desktop/Zak_Portfolio/PGA_Masters


In [4]:
## Open the Chrome browser of 2018 Fairways in Regulation stats

driver = webdriver.Chrome()
driver.get('https://2018.masters.com/en_US/scores/stats/fir.html')

In [7]:
## Get a list of the player names 

fir2018_players = []

## Compare xpaths to iterate over all players
## first xpath: //*[@id="table-8655"]/tbody/tr[1]/td[1]/a
## last xpath: //*[@id="table-8655"]/tbody/tr[87]/td[1]/a

for x in range(88):
    path = '//*[@id="table-8655"]/tbody/tr[' + str(x) + ']/td[1]/a'
    fir_players = driver.find_elements_by_xpath(path)

    for player in fir_players:
        fir2018_players.append(player.text)

In [8]:
print(len(fir2018_players))
print(fir2018_players[:3])
print(fir2018_players[-3:])

87
['Bernhard Langer', 'Ryan Moore', 'Bubba Watson']
['Austin Cook', 'Trevor Immelman', 'Harry Ellis (A)']


In [11]:
## Get the Fairways in Regulation stats in the same order that the players were in

fir2018_stats = []

## Get the stats from the class method instead of xpath here

fir_stats = driver.find_elements_by_class_name('col6')

for stat in fir_stats:
    fir2018_stats.append(stat.text)


In [12]:
print(len(fir2018_stats))
print(fir2018_stats[:3])
print(fir2018_stats[-3:])

88
['TOTAL', '48 / 56 = 85.71%', '47 / 56 = 83.93%']
['13 / 28 = 46.43%', '10 / 28 = 35.71%', '8 / 28 = 28.57%']


In [13]:
## Delete the 'TOTAL' element from the list

del fir2018_stats[0]

print(len(fir2018_stats))
print(fir2018_stats[:3])

87
['48 / 56 = 85.71%', '47 / 56 = 83.93%', '47 / 56 = 83.93%']


In [14]:
## Create a DataFrame of the Fairways in Regulation
fir2018 = pd.DataFrame.from_dict({'Player':fir2018_players, 'FIR':fir2018_stats})

In [15]:
fir2018.head(3)

Unnamed: 0,Player,FIR
0,Bernhard Langer,48 / 56 = 85.71%
1,Ryan Moore,47 / 56 = 83.93%
2,Bubba Watson,47 / 56 = 83.93%


In [16]:
## To be able to analyze, I only want the % as a float, luckily it will always be 5 elements long in the same position

fir2018['FIR'] = fir2018['FIR'].map(lambda x:str(x)[-6:-1])

In [17]:
fir2018.head(3)

Unnamed: 0,Player,FIR
0,Bernhard Langer,85.71
1,Ryan Moore,83.93
2,Bubba Watson,83.93


In [18]:
fir2018['FIR Rank'] = fir2018['FIR'].rank(method='min', ascending=False)

In [19]:
fir2018.head(3)

Unnamed: 0,Player,FIR,FIR Rank
0,Bernhard Langer,85.71,1.0
1,Ryan Moore,83.93,2.0
2,Bubba Watson,83.93,2.0


In [20]:
## Looks good to me

fir2018.to_csv('2018_FIR.csv')

## Greens in Regulation

In [21]:
driver.get('https://2018.masters.com/en_US/scores/stats/gir.html')

In [24]:
## Get the players names in the order listed on the Greens in Regulation page

gir2018_players = []

## Compare xpaths to iterate over all players
## //*[@id="table-4750"]/tbody/tr[1]/td[1]/a
## //*[@id="table-4750"]/tbody/tr[87]/td[1]/a

for x in range(88):
    path = '//*[@id="table-4750"]/tbody/tr[' + str(x) + ']/td[1]/a'
    gir_player_names = driver.find_elements_by_xpath(path)

    for player in gir_player_names:
        gir2018_players.append(player.text)

In [25]:
## Check that everything lines up correctly

print(len(gir2018_players))
print(gir2018_players[:3])
print(gir2018_players[-3:])

87
['Bubba Watson', 'Justin Thomas', 'Justin Rose']
['Sandy Lyle', 'Harry Ellis (A)', 'Larry Mize']


In [26]:
## Get the Greens in Regulation % in the same order as the players

gir2018_stats = []

## Going to get by xpath to avoid having to delete the first element
## xpath of first: //*[@id="table-4750"]/tbody/tr[1]/td[6]
## xpath of last: //*[@id="table-4750"]/tbody/tr[87]/td[6]


for i in range(88):
    path = '//*[@id="table-4750"]/tbody/tr[' + str(i) + ']/td[6]'
    gir_stats = driver.find_elements_by_xpath(path)
    for stat in gir_stats:
        gir2018_stats.append(stat.text)

In [27]:
## Make sure we're all good with the website

print(len(gir2018_stats))
print(gir2018_stats[:3])
print(gir2018_stats[-3:])

87
['56 / 72 = 77.78%', '53 / 72 = 73.61%', '53 / 72 = 73.61%']
['14 / 36 = 38.89%', '12 / 36 = 33.33%', '11 / 36 = 30.56%']


In [28]:
## Create the Greens In Regulations dataframe

gir2018 = pd.DataFrame.from_dict({'Player':gir2018_players, 'GIR':gir2018_stats})

In [29]:
gir2018.head(3)

Unnamed: 0,Player,GIR
0,Bubba Watson,56 / 72 = 77.78%
1,Justin Thomas,53 / 72 = 73.61%
2,Justin Rose,53 / 72 = 73.61%


In [30]:
## Get just the percent in a column

gir2018['GIR'] = gir2018['GIR'].map(lambda x:str(x)[-6:-1])

In [31]:
gir2018.head(2)

Unnamed: 0,Player,GIR
0,Bubba Watson,77.78
1,Justin Thomas,73.61


In [32]:
## Add a column that ranks the players by their GIR percent

gir2018['GIR Rank'] = gir2018['GIR'].rank(method='min', ascending=False)

In [33]:
gir2018.head(3)

Unnamed: 0,Player,GIR,GIR Rank
0,Bubba Watson,77.78,1.0
1,Justin Thomas,73.61,2.0
2,Justin Rose,73.61,2.0


In [34]:
## Save the dataframe to my local computer

gir2018.to_csv('2018_GIR.csv')

## Driving Distance Stats

In [35]:
## Access the Driving Distance Average page

driver.get('https://2018.masters.com/en_US/scores/stats/drives_avg.html')

In [36]:
## Get the player names in the listed order

dist2018_players = []

## First xpath: //*[@id="table-4838"]/tbody/tr[1]/td[1]/a
## Last xpath: //*[@id="table-4838"]/tbody/tr[87]/td[1]/a

for i in range(88):
    path = '//*[@id="table-4838"]/tbody/tr[' + str(i) + ']/td[1]/a'
    dist_player_names = driver.find_elements_by_xpath(path)
    for player in dist_player_names:
        dist2018_players.append(player.text)

In [37]:
## Compare to website
print(len(dist2018_players))
print(dist2018_players[:3])
print(dist2018_players[-3:])

87
['Tommy Fleetwood', 'Dustin Johnson', 'Bubba Watson']
['Mike Weir', "Mark O'Meara", 'Larry Mize']


In [38]:
## In that same order, get the driving distance of each player

dist2018_stats = []

## first xpath: //*[@id="table-4838"]/tbody/tr[1]/td[6]
## last xpath: //*[@id="table-4838"]/tbody/tr[87]/td[6]

for i in range(88):
    path = '//*[@id="table-4838"]/tbody/tr[' + str(i) + ']/td[6]'
    dist_stats = driver.find_elements_by_xpath(path)
    for stat in dist_stats:
        dist2018_stats.append(stat.text)

In [39]:
print(len(dist2018_stats))
print(dist2018_stats[:3])
print(dist2018_stats[-3:])

87
['310.00', '304.38', '303.75']
['270.00', '268.00', '252.25']


In [40]:
## Create the Driving Distance Average dataframe

dist2018 = pd.DataFrame.from_dict({'Player':dist2018_players, 'dist':dist2018_stats})

In [41]:
dist2018.head(3)

Unnamed: 0,Player,dist
0,Tommy Fleetwood,310.0
1,Dustin Johnson,304.38
2,Bubba Watson,303.75


In [42]:
dist2018['dist Rank'] = dist2018['dist'].rank(method='min', ascending=False)

In [43]:
dist2018.head(3)

Unnamed: 0,Player,dist,dist Rank
0,Tommy Fleetwood,310.0,1.0
1,Dustin Johnson,304.38,4.0
2,Bubba Watson,303.75,5.0


In [44]:
## Save to local machine

dist2018.to_csv('dist_2019.csv')

## Putting Stats

In [45]:
driver.get('https://2018.masters.com/en_US/scores/stats/putts.html')

In [46]:
## Get player names

putt2018_players = []

## first xpath: //*[@id="table-4749"]/tbody/tr[1]/td[1]/a
## last xpath: //*[@id="table-4749"]/tbody/tr[87]/td[1]/a

for i in range(88):
    path = '//*[@id="table-4749"]/tbody/tr[' + str(i) + ']/td[1]/a'
    putt_player_names = driver.find_elements_by_xpath(path)
    for name in putt_player_names:
        putt2018_players.append(name.text)

In [47]:
print(len(putt2018_players))
print(putt2018_players[:3])
print(putt2018_players[-3:])

87
['Sandy Lyle', 'Patrick Reed', 'Jose Maria Olazabal']
['Patrick Cantlay', 'Angel Cabrera', 'Yuxin Lin (A)']


In [48]:
## Get putting stats

putt2018_stats = []

## first xpath: //*[@id="table-4749"]/tbody/tr[1]/td[6]
## last xpath: //*[@id="table-4749"]/tbody/tr[87]/td[6]

for i in range(88):
    path = '//*[@id="table-4749"]/tbody/tr[' + str(i) + ']/td[6]'
    putt_stats = driver.find_elements_by_xpath(path)
    for stat in putt_stats:
        putt2018_stats.append(stat.text)

In [49]:
print(len(putt2018_stats))
print(putt2018_stats[:3])
print(putt2018_stats[-3:])

87
['1.42 (1)', '1.44 (2)', '1.47']
['1.83 (5)', '1.83 (5)', '1.94 (6)']


In [50]:
## Create the putting dataframe

putt2018 = pd.DataFrame.from_dict({'Player':putt2018_players, 'Putting':putt2018_stats})

In [51]:
putt2018.head(3)

Unnamed: 0,Player,Putting
0,Sandy Lyle,1.42 (1)
1,Patrick Reed,1.44 (2)
2,Jose Maria Olazabal,1.47


In [53]:
## Get rid of the number of 3 putts, in ()

putt2018['Putting'] = putt2018['Putting'].map(lambda x:str(x)[:4])

In [54]:
putt2018.head(3)

Unnamed: 0,Player,Putting
0,Sandy Lyle,1.42
1,Patrick Reed,1.44
2,Jose Maria Olazabal,1.47


In [55]:
## Rank the players by their average number of putts

putt2018['Putting Rank'] = putt2018['Putting'].rank(method='min')

In [57]:
putt2018.head(3)

Unnamed: 0,Player,Putting,Putting Rank
0,Sandy Lyle,1.42,1.0
1,Patrick Reed,1.44,2.0
2,Jose Maria Olazabal,1.47,3.0


In [58]:
## Save

putt2018.to_csv('2018_Putting.csv')

## Get where each player finished in the tournament

In [59]:
## The names didn't align on the masters website used before, stats page used 'Patrick Reed', leaderboard used 'P. Reed'

driver.get('https://www.pga.com/events/masters/leaderboard/2018')

In [60]:
## Get the player names

finish2018_players_first = []
finish2018_players_last = []

## Find these by the class name, the first and last names are in different locations

finish2018_first = driver.find_elements_by_class_name('leaderboard__player__name--firstname')
finish2018_last = driver.find_elements_by_class_name('leaderboard__player__name--lastname')

for first in finish2018_first:
    finish2018_players_first.append(first.text)
    
for last in finish2018_last:
    finish2018_players_last.append(last.text)

In [61]:
print(len(finish2018_players_first))
print(finish2018_players_first[:3])
print(finish2018_players_first[-3:])

87
['Patrick', 'Rickie', 'Jordan']
['Sergio', 'Matt', 'Harry']


In [62]:
print(len(finish2018_players_last))
print(finish2018_players_last[:3])
print(finish2018_players_last[-3:])

87
['Reed', 'Fowler', 'Spieth']
['Garcia', 'Parziale', 'Ellis']


In [63]:
## Get the position that the player finished in the tournament

finish2018_position = []

finish_position = driver.find_elements_by_class_name('leaderboard__player__item--pos')

for pos in finish_position:
    finish2018_position.append(pos.text)

In [64]:
print(len(finish2018_position))

87


In [65]:
## Get the score of each player for the tournament

finish2018_score = []

finish_score = driver.find_elements_by_class_name('leaderboard__player__item--total-par')

for score in finish_score:
    finish2018_score.append(score.text)

In [66]:
print(len(finish2018_score))

87


In [67]:
## Create a dataframe for the players finishing position

finish2018 = pd.DataFrame.from_dict({'Player_first':finish2018_players_first, 
                                    'Player_last':finish2018_players_last, 
                                    'Position':finish2018_position,
                                    'Score':finish2018_score})

In [68]:
finish2018.head(3)

Unnamed: 0,Player_first,Player_last,Position,Score
0,Patrick,Reed,1,-15
1,Rickie,Fowler,2,-14
2,Jordan,Spieth,3,-13


In [69]:
finish2018.tail()

Unnamed: 0,Player_first,Player_last,Position,Score
82,Yuxin,Lin,CUT,-
83,Mark,O'Meara,CUT,-
84,Sergio,Garcia,CUT,-
85,Matt,Parziale,CUT,-
86,Harry,Ellis,CUT,-


In [70]:
## Combine the names into one column

finish2018['Player'] = finish2018['Player_first'] + ' ' + finish2018['Player_last']

In [71]:
finish2018.head(2)

Unnamed: 0,Player_first,Player_last,Position,Score,Player
0,Patrick,Reed,1,-15,Patrick Reed
1,Rickie,Fowler,2,-14,Rickie Fowler


In [72]:
## Drop the first and last name columns

finish2018.drop(['Player_first', 'Player_last'], axis=1, inplace=True)

In [73]:
## Rearrange the order of the columns

finish2018 = finish2018[['Player', 'Position', 'Score']]

In [74]:
finish2018.tail()

Unnamed: 0,Player,Position,Score
82,Yuxin Lin,CUT,-
83,Mark O'Meara,CUT,-
84,Sergio Garcia,CUT,-
85,Matt Parziale,CUT,-
86,Harry Ellis,CUT,-


In [75]:
## Create a 'Cut' column, 1 if the player was cut, 0 if not.

finish2018['Cut'] = np.where(finish2018['Position'] == 'CUT', 1, 0)

In [80]:
## Extract the players that were cut and give them a score and position

cut_players = finish2018[finish2018['Cut'] == 1]

In [87]:
## Sandy Lyle should be the first player, he was the first to miss the cut on the website.

cut_players.head()

Unnamed: 0,Player,Position,Score,Cut
53,Sandy Lyle,CUT,-,1
54,Charl Schwartzel,CUT,-,1
55,Jose Maria Olazabal,CUT,-,1
56,Jason Dufner,CUT,-,1
57,Patrick Cantlay,CUT,-,1


In [83]:
## Get the score, from their total and subtract 144 (two rounds at 72 par)

## first xpath:  //*[@id="scoring"]/div[1]/div[1]/div[5]/div[55]/div/span[12]
## last xpath:   //*[@id="scoring"]/div[1]/div[1]/div[5]/div[88]/div/span[12]

cut_scores = []

for i in range(55, 89):
    path = '//*[@id="scoring"]/div[1]/div[1]/div[5]/div[' + str(i) + ']/div/span[12]'
    scores = driver.find_elements_by_xpath(path)
    for s in scores:
        cut_scores.append(s.text)

In [85]:
print(len(cut_scores))
print(cut_scores[:3])
print(cut_scores[-3:])

34
['150', '150', '150']
['159', '160', '166']


In [88]:
cut_players.shape

(34, 4)

In [95]:
## Convert the list of strings to ints

cut_scores = list(map(int, cut_scores))

In [98]:
## Subtract par of 144 (for two rounds) from the total scores

cut_scores = [x - 144 for x in cut_scores]

In [100]:
cut_scores[:5]

[6, 6, 6, 6, 7]

In [101]:
## Add the new list of scores to the existing 'Score' column in the cut_players df

cut_players['Score'] = cut_scores

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [102]:
## Rank the players based on their score
## The last player to make the cut was in 53, our cut_players need to start at 54

cut_players['Position'] = (cut_players['Score'].rank(method='min')) + 53

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [103]:
cut_players.head()

Unnamed: 0,Player,Position,Score,Cut
53,Sandy Lyle,54.0,6,1
54,Charl Schwartzel,54.0,6,1
55,Jose Maria Olazabal,54.0,6,1
56,Jason Dufner,54.0,6,1
57,Patrick Cantlay,58.0,7,1


In [104]:
## Concatenate this back to the original df of players positions
## Get the dataframe of just the players that made the cut

finish2018 = finish2018[finish2018['Position'] != 'CUT']

In [105]:
finish2018 = pd.concat([finish2018, cut_players])

In [106]:
finish2018.shape

(87, 4)

In [107]:
finish2018.tail()

Unnamed: 0,Player,Position,Score,Cut
82,Yuxin Lin,82,15,1
83,Mark O'Meara,82,15,1
84,Sergio Garcia,82,15,1
85,Matt Parziale,86,16,1
86,Harry Ellis,87,22,1


In [108]:
## Finished product of where all players that made the cut finished and their score

finish2018.to_csv('2018_Positions.csv')

## Concatenate all the dataframes together

In [109]:
## Make the 'Player' column the index for each df

fir2018.index = fir2018['Player']
gir2018.index = gir2018['Player']
dist2018.index = dist2018['Player']
putt2018.index = putt2018['Player']
finish2018.index = finish2018['Player']

In [110]:
## Concatenate just the stats into a dataframe

stats_df = pd.concat([fir2018, dist2018, gir2018, putt2018], axis=1)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  """Entry point for launching an IPython kernel.


In [111]:
stats_df.head()

Unnamed: 0,Player,FIR,FIR Rank,Player.1,dist,dist Rank,Player.2,GIR,GIR Rank,Player.3,Putting,Putting Rank
Adam Hadwin,Adam Hadwin,69.64,26.0,Adam Hadwin,291.38,38.0,Adam Hadwin,69.44,11.0,Adam Hadwin,1.67,49.0
Adam Scott,Adam Scott,69.64,26.0,Adam Scott,289.62,47.0,Adam Scott,68.06,15.0,Adam Scott,1.74,74.0
Alex Noren,Alex Noren,50.0,78.0,Alex Noren,285.25,60.0,Alex Noren,52.78,67.0,Alex Noren,1.61,22.0
Angel Cabrera,Angel Cabrera,67.86,31.0,Angel Cabrera,291.75,34.0,Angel Cabrera,44.44,77.0,Angel Cabrera,1.83,85.0
Austin Cook,Austin Cook,46.43,84.0,Austin Cook,280.75,74.0,Austin Cook,55.56,61.0,Austin Cook,1.75,75.0


In [112]:
## Drop the multiple 'Player' columns

stats_df.drop('Player', axis=1, inplace=True)

In [113]:
stats_df.head()

Unnamed: 0,FIR,FIR Rank,dist,dist Rank,GIR,GIR Rank,Putting,Putting Rank
Adam Hadwin,69.64,26.0,291.38,38.0,69.44,11.0,1.67,49.0
Adam Scott,69.64,26.0,289.62,47.0,68.06,15.0,1.74,74.0
Alex Noren,50.0,78.0,285.25,60.0,52.78,67.0,1.61,22.0
Angel Cabrera,67.86,31.0,291.75,34.0,44.44,77.0,1.83,85.0
Austin Cook,46.43,84.0,280.75,74.0,55.56,61.0,1.75,75.0


In [114]:
player_names = set(list(stats_df.index) + list(finish2018.index))

In [115]:
player_names

{'Adam Hadwin',
 'Adam Scott',
 'Alex Noren',
 'Alexander Noren',
 'Angel Cabrera',
 'Austin Cook',
 'Bernd Wiesberger',
 'Bernhard Langer',
 'Billy Horschel',
 'Branden Grace',
 'Brendan Steele',
 'Brian Harman',
 'Bryson DeChambeau',
 'Bubba Watson',
 'Cameron Smith',
 'Charl Schwartzel',
 'Charley Hoffman',
 'Chez Reavie',
 'Daniel Berger',
 'Danny Willett',
 'Doc Redman',
 'Doc Redman (A)',
 'Doug Ghim',
 'Doug Ghim (A)',
 'Dustin Johnson',
 'Dylan Frittelli',
 'Francesco Molinari',
 'Fred Couples',
 'Gary Woodland',
 'Hao-Tong Li',
 'Haotong Li',
 'Harry Ellis',
 'Harry Ellis (A)',
 'Henrik Stenson',
 'Hideki Matsuyama',
 'Ian Poulter',
 'Ian Woosnam',
 'Jason Day',
 'Jason Dufner',
 'Jhonattan Vegas',
 'Jimmy Walker',
 'Joaquin Niemann',
 'Joaquin Niemann (A)',
 'Jon Rahm',
 'Jordan Spieth',
 'Jose Maria Olazabal',
 'Justin Rose',
 'Justin Thomas',
 'Kevin Chappell',
 'Kevin Kisner',
 'Kiradech Aphibarnrat',
 'Kyle Stanley',
 'Larry Mize',
 'Louis Oosthuizen',
 'Marc Leishman',
 

In [129]:
## Replace the names in the stats_df index to match the final position df

new_names = {'Doc Redman (A)': 'Doc Redman',
             'Doug Ghim (A)': 'Doug Ghim',
             'Haotong Li': 'Hao-Tong Li',
             'Harry Ellis (A)': 'Harry Ellis',
             'Joaquin Niemann (A)': 'Joaquin Niemann',
             'Matt Parziale (A)': 'Matt Parziale',
             'Rafael Cabrera Bello': 'Rafael Cabrera-Bello',
             'Yuxin Lin (A)': 'Yuxin Lin',
             'Ted Potter, Jr.':'Ted Potter',
             'Alex Noren': 'Alexander Noren'}

stats_df = stats_df.rename(index=new_names)

In [130]:
## Add in the final position of each player at the tournament

masters_2018 = pd.concat([stats_df, finish2018], axis=1)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  This is separate from the ipykernel package so we can avoid doing imports until


In [131]:
masters_2018.shape

(87, 12)

In [133]:
masters_2018.drop('Player', axis=1, inplace=True)

In [137]:
## Remove the 'T' in the 'Position' where a player tied with someone else

masters_2018['Position'] = masters_2018['Position'].apply(lambda x:str(x).strip('T'))

In [138]:
masters_2018.head(10)

Unnamed: 0,FIR,FIR Rank,dist,dist Rank,GIR,GIR Rank,Putting,Putting Rank,Position,Score,Cut
Adam Hadwin,69.64,26.0,291.38,38.0,69.44,11.0,1.67,49.0,24.0,-1,0
Adam Scott,69.64,26.0,289.62,47.0,68.06,15.0,1.74,74.0,32.0,1,0
Alexander Noren,50.0,78.0,285.25,60.0,52.78,67.0,1.61,22.0,69.0,9,1
Angel Cabrera,67.86,31.0,291.75,34.0,44.44,77.0,1.83,85.0,82.0,15,1
Austin Cook,46.43,84.0,280.75,74.0,55.56,61.0,1.75,75.0,75.0,10,1
Bernd Wiesberger,58.93,60.0,294.88,17.0,72.22,4.0,1.71,68.0,24.0,-1,0
Bernhard Langer,85.71,1.0,269.5,85.0,68.06,15.0,1.69,62.0,38.0,3,0
Billy Horschel,64.29,45.0,293.25,29.0,50.0,73.0,1.78,79.0,78.0,11,1
Branden Grace,58.93,60.0,291.62,35.0,62.5,34.0,1.6,20.0,24.0,-1,0
Brendan Steele,57.14,64.0,294.25,21.0,63.89,31.0,1.81,82.0,58.0,7,1


In [139]:
## Rearrange to columns to my liking

masters_2018 = masters_2018[['Position', 'Score', 'FIR', 'FIR Rank', 'dist', 'dist Rank', 'GIR', 'GIR Rank', 'Putting',
       'Putting Rank', 'Cut']]

In [141]:
## Replace 'E' for even par, with 0 to be able to convert to an int

masters_2018['Score'] = masters_2018['Score'].replace({'E':0})

In [142]:
## Lastly, convert all the columns to numeric type to analyze later

cols = ['FIR', 'FIR Rank', 'dist', 'dist Rank', 'GIR', 'GIR Rank', 'Putting',
       'Putting Rank', 'Position', 'Score', 'Cut']

masters_2018[cols] = masters_2018[cols].apply(pd.to_numeric, axis=1)

In [146]:
masters_2018['Year'] = 2018

In [147]:
masters_2018.head(10)

Unnamed: 0,Position,Score,FIR,FIR Rank,dist,dist Rank,GIR,GIR Rank,Putting,Putting Rank,Cut,Year
Adam Hadwin,24.0,-1.0,69.64,26.0,291.38,38.0,69.44,11.0,1.67,49.0,0.0,2018
Adam Scott,32.0,1.0,69.64,26.0,289.62,47.0,68.06,15.0,1.74,74.0,0.0,2018
Alexander Noren,69.0,9.0,50.0,78.0,285.25,60.0,52.78,67.0,1.61,22.0,1.0,2018
Angel Cabrera,82.0,15.0,67.86,31.0,291.75,34.0,44.44,77.0,1.83,85.0,1.0,2018
Austin Cook,75.0,10.0,46.43,84.0,280.75,74.0,55.56,61.0,1.75,75.0,1.0,2018
Bernd Wiesberger,24.0,-1.0,58.93,60.0,294.88,17.0,72.22,4.0,1.71,68.0,0.0,2018
Bernhard Langer,38.0,3.0,85.71,1.0,269.5,85.0,68.06,15.0,1.69,62.0,0.0,2018
Billy Horschel,78.0,11.0,64.29,45.0,293.25,29.0,50.0,73.0,1.78,79.0,1.0,2018
Branden Grace,24.0,-1.0,58.93,60.0,291.62,35.0,62.5,34.0,1.6,20.0,0.0,2018
Brendan Steele,58.0,7.0,57.14,64.0,294.25,21.0,63.89,31.0,1.81,82.0,1.0,2018


In [148]:
## Save

masters_2018.to_csv('Masters_2018.csv')

In [145]:
driver.close()