## Get Static Fight Stats

In [30]:
# Import required libraries
from bs4 import BeautifulSoup
import requests
import string
import csv

# Initialize the CSV file for writing
with open('ufc_fighters_static.csv', 'w', newline='') as csvfile:
    # Define the column headers for the CSV
    fieldnames = ['Name', 'Height', 'Reach', 'Stance', 'DOB', 'ID']
    
    # Initialize the CSV writer
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
    # Write the header row to the CSV
    writer.writeheader()

    # Generate a list of lowercase alphabets to iterate through
    alphabets = list(string.ascii_lowercase)

    # Loop through each alphabet letter
    for letter in alphabets:
        # Fetch the fighter list page for the current alphabet letter
        source = requests.get(f'http://www.ufcstats.com/statistics/fighters?char={letter}&page=all').text
        soup = BeautifulSoup(source, "lxml")

        # Loop through each fighter entry on the page
        for fighter in soup.find_all('tr', attrs={'class': 'b-statistics__table-row'}):
            a_href = fighter.find('a')
            
            # Check if the fighter entry has a URL
            if a_href is not None:
                # Fetch the fighter's individual stats page
                source2 = requests.get(a_href.get("href")).text
                
                # Extract the fighter ID from the URL
                ID = a_href.get("href").split('/')[-1].strip()
                
                soup2 = BeautifulSoup(source2, "lxml")

                # Extract and store the fighter's name
                name = soup2.find('span', {'class': 'b-content__title-highlight'}).text.strip()

                # Initialize a dictionary to hold the fighter's stats
                fighter_stats = {'Name': name, 'ID': ID}

                # Extract and store other stats like Height, Reach, Stance, and DOB
                stats = soup2.find_all('li', attrs={'class': 'b-list__box-list-item'})
                for stat in stats:
                    cleaned_stat = [string.strip() for string in stat.text.split(":")]
                    if len(cleaned_stat) > 1:
                        key = cleaned_stat[0]
                        value = cleaned_stat[1]
                        fighter_stats[key] = value

                # Write the collected stats to the CSV file
                writer.writerow(fighter_stats)


**Main Scraping Functions**

In [138]:
from bs4 import BeautifulSoup
import requests

"""
RUN THIS BOX TO SEE THE CURRENT RESULTS AND WHAT THE DATA SHOULD LOOK LIKE.

Eventually, this collection of functions should be under a FightData object 
-> Then we can just call FightData.scrape_ufc_stats() as well as update the columns 
after every new fight -> make this a cron job that runs every sat night after the fights.

Feel free to optimize this if it looks a little chunky.

Data scraped should be in the following format:
[col1_title, col2_title, col3_title, ...]
[r1c1_data,  r1c2_data,  r1c3_data,  ...]
[r2c1_data,  r2c2_data,  r2c3_data,  ...]
[r3c1_data,  r3c2_data,  r3c3_data,  ...]
[r4c1_data,  r4c2_data,  r4c3_data,  ...]
...

The data will then be put into a CSV file.
"""

# Add to these columns with new data scraped
columns = ['Fight Night Title', 'Fighter A', 'Fighter A Link', 'Fighter B', 'Fighter B Link', 'Division', 
           'Outcome Method', 'Outcome Round', 'Outcome Time', 'Outcome Format', 
           'Referee', 'Outcome Detail']

UFC_STATS_URL = 'http://www.ufcstats.com/statistics/events/completed?page=all'

def scrape_ufc_stats():
    data = [columns]
    source_main = requests.get(UFC_STATS_URL).text
    soup_main = BeautifulSoup(source_main, "lxml")
    fights_main = soup_main.find_all('tr', attrs={'class': 'b-statistics__table-row'})

    for i in range(len(fights_main)-1, 0, -1):
        fight_night = fights_main[i]

        fight_night_ahref = fight_night.find('a')
        if fight_night_ahref is not None:
            fight_night_title = fight_night_ahref.text.strip()
            fight_night_link = fight_night_ahref.get("href")

            get_fights(fight_night_link, data, fight_night_title)
        break

def get_fights(link, data, fight_night_title):
    source_fight_night = requests.get(link).text
    soup_fight_night = BeautifulSoup(source_fight_night, "lxml")
    fight_night_fights = soup_fight_night.find_all('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})

    for j in range(len(fight_night_fights)-1, 0, -1):
        fight = fight_night_fights[j]
        fight_ahref = fight.find('a')

        if fight_ahref is not None:
            fight_link = fight_ahref.get("href")

            fighters_data = get_fighters(fight_link)
            fight_overview_data = get_fight_overview(fight_link)

            # Add more functions below that scrape certain sections of the ufcstats page
            # Totals (KD, Sig. Str, Sig. Str. %, Total Strikes, Takedowns, Takedown %, Sub. Att, Rev., Crtl.)
            # ^ Per round as well as we want the most possible data
            # Add Significant Strikes by Target (Head, Body, Leg) as well

            # fight_totals = get_fight_totals(fight_link) <-- Per round stuff as well!
            # fight_sig_strikes = get_fight_sig_strikes(fight_link) <-- Per round stuff as well!
            data.append([fight_night_title] + list(fighters_data) + list(fight_overview_data))
            # + list(fight_totals) + list(fight_sig_strikes)
            display(data)

        # Remove after development
        break

def get_fighters(link):
    source_fight = requests.get(link).text
    soup_fight = BeautifulSoup(source_fight, "lxml")
    fight_breakdown = soup_fight.find_all('tr', attrs={'class' : 'b-fight-details__table-row'})
    fighters = soup_fight.find_all('a', class_='b-link b-fight-details__person-link')

    fighter_a = fighters[0]
    fighter_a_name = fighter_a.text.strip()
    fighter_a_link = fighter_a.get("href")

    fighter_b = fighters[1]
    fighter_b_name = fighter_b.text.strip()
    fighter_b_link = fighter_b.get("href")

    return fighter_a_name, fighter_a_link, fighter_b_name, fighter_b_link

def get_fight_overview(link):
    source = requests.get(link).text
    soup = BeautifulSoup(source, "lxml")

    details = soup.find_all('i', attrs={'class' : 'b-fight-details__text-item'})

    division = ' '.join(soup.find('i', attrs={'class' : 'b-fight-details__fight-title'}).text.strip().rsplit((' ', 1)[0])[:-1])
    outcome_method = soup.find('i', attrs={'style' : 'font-style: normal'}).text.strip()
    outcome_round = details[0].text.strip().split(' ')[-1]
    outcome_time = details[1].text.strip().split(' ')[-1]
    outcome_format = details[2].text.strip().split(' ')[-1]
    referee = details[3].text.strip().split(' ')[-1]
    outcome_detail = ' '.join([word for word in soup.find_all(True, attrs={'class' : 'b-fight-details__text'})[-1].text.strip().split(' ') if word.strip()][1:])

    return division, outcome_method, outcome_round, outcome_time, outcome_format, referee, outcome_detail

def get_fight_totals(link):
    """TODO: Implement this function"""
    return None

def get_fight_sig_strikes(link):
    """TODO: Implement this function"""
    return None

scrape_ufc_stats()

[['Fight Night Title',
  'Fighter A',
  'Fighter A Link',
  'Fighter B',
  'Fighter B Link',
  'Division',
  'Outcome Method',
  'Outcome Round',
  'Outcome Time',
  'Outcome Format',
  'Referee',
  'Outcome Detail'],
 ['UFC 2: No Way Out',
  'Scott Morris',
  'http://www.ufcstats.com/fighter-details/be9d259be012e8a4',
  'Sean Daugherty',
  'http://www.ufcstats.com/fighter-details/a683f9ddb70aa4bd',
  'Open Weight',
  'Submission',
  '1',
  '0:20',
  'Limit',
  'McCarthy',
  'Guillotine Choke From Mount']]