## SCRAPING Event Details from UFCSTATS

In [59]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [3]:
URL = 'http://www.ufcstats.com/statistics/events/completed?page=all'
page = requests.get(URL)

In [4]:
soup = BeautifulSoup(page.content,'html.parser')

In [46]:
fight_name = soup.find_all(class_="b-link b-link_style_black")
fight_date = soup.find_all(class_="b-statistics__date")
fight_location = soup.find_all(class_="b-statistics__table-col b-statistics__table-col_style_big-top-padding")

In [47]:
#get event name, just the text values from the soup
event = []
for name in fight_name:
    event.append(name.text.strip())

In [48]:
#get event date, just text values from the soup
event_date = []
for date in fight_date:
    event_date.append(date.text.strip())

In [49]:
#get location, just text values from the soup
location = []
for loc in fight_location:
    location.append(loc.text.strip())

In [54]:
#len of the lists are not all equal
print(len(event),len(event_date),len(location))

533 534 534


In [None]:
"""BOTH fight_date and location have 1 extra value, which are the dates and location for the next upcoming event
    REMOVE the first value from both fight_date and location.
"""

In [57]:
#get all values except the first for each list
event_date = event_date[1:]
location = location[1:]

In [58]:
#len of the lists are the same now
print(len(event),len(event_date),len(location))

533 533 533


In [67]:
eventdetails_zipped = list(zip(event,event_date,location))

In [72]:
eventdetails_zipped[0]

('UFC Fight Night: Covington vs. Woodley',
 'September 19, 2020',
 'Las Vegas, Nevada, USA')

In [110]:
event_details = pd.DataFrame(data = eventdetails_zipped, columns = ['Event_Name','Date','Location'])

In [111]:
#converting to date_time
event_details['Date'] = pd.to_datetime(event_details['Date'])

In [112]:
#turn location in a list
event_details['Location'] = event_details['Location'].str.split(',').copy()
event_details['Location'].head(3)

0    [Las Vegas,  Nevada,  USA]
1    [Las Vegas,  Nevada,  USA]
2    [Las Vegas,  Nevada,  USA]
Name: Location, dtype: object

In [113]:
#from the list first index is city, second is state, last is country
event_details['City'] = event_details['Location'].map(lambda x:x[0])
event_details['State'] = event_details['Location'].map(lambda x:x[1])
event_details['Country'] = event_details['Location'].map(lambda x:x[-1])

In [114]:
#drop Location since we have city, state, country
event_details.drop(labels= 'Location',axis=1,inplace = True)

In [115]:
event_details.head()

Unnamed: 0,Event_Name,Date,City,State,Country
0,UFC Fight Night: Covington vs. Woodley,2020-09-19,Las Vegas,Nevada,USA
1,UFC Fight Night: Waterson vs. Hill,2020-09-12,Las Vegas,Nevada,USA
2,UFC Fight Night: Overeem vs. Sakai,2020-09-05,Las Vegas,Nevada,USA
3,UFC Fight Night: Smith vs. Rakic,2020-08-29,Las Vegas,Nevada,USA
4,UFC Fight Night: Munhoz vs. Edgar,2020-08-22,Las Vegas,Nevada,USA


In [118]:
import os

In [127]:
os.chdir('UFC_Project/')

In [None]:
"""save as csv"""
#event_details.to_csv('event_details.csv',index= False)