In [1]:
# Import required libraries
from bs4 import BeautifulSoup
import requests
import string
import csv
import googlemaps
import pandas as pd
from tqdm import tqdm
import re

## Get Static Fight Stats

In [2]:
def get_elevation(loc):
    gmaps = googlemaps.Client(key='AIzaSyA55p_Akifu1mOB5pTPYD3AWYhtY_ogCO0')

    # Geocoding an address
    geocode_result = gmaps.geocode(loc)
    # print(geocode_result)
    if(len(geocode_result) == 0):
        return None
    coords = geocode_result[0]['geometry']['location']

    # Elevation
    elev_res = gmaps.elevation((coords['lat'], coords['lng']))

    elevation = elev_res[0]['elevation']

    return elevation

def isSameName(name1, name2):
    # Function to clean the name by removing non-letter characters
    clean_name = lambda name: re.sub(r'[^a-zA-Z]', '', name)

    # Clean both names
    cleaned_name1 = clean_name(name1).lower()
    cleaned_name2 = clean_name(name2).lower()

    return cleaned_name1 == cleaned_name2


In [7]:
def get_fighter_link(name):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate, br',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Cache-Control': 'max-age=0'
    }

    # Splitting the name into parts
    input_name_parts = name.split(' ')
    input_first_name, input_middle_name, input_last_name = None, None, None

    # Assigning name parts based on the number of words in the name
    if len(input_name_parts) == 2:
        input_first_name, input_last_name = input_name_parts
    elif len(input_name_parts) == 3:
        input_first_name, input_middle_name, input_last_name = input_name_parts
    # Creating a set of possible name variations to check against
    name_variations = set()
    name_variations.add(name.lower())
    if input_middle_name:
        # Adding variations for names with a middle name
        name_variations.update([
            f"{input_first_name} {input_last_name}".lower(),
            f"{input_last_name} {input_first_name}".lower(),
            f"{input_middle_name} {input_last_name}".lower(),
            f"{input_last_name} {input_middle_name}".lower(),
            f"{input_middle_name} {input_first_name}".lower(),
            f"{input_first_name} {input_middle_name}".lower(),
            f"{input_first_name} {input_middle_name} {input_last_name}".lower(),
            f"{input_first_name} {input_last_name} {input_middle_name}".lower(),
            f"{input_middle_name} {input_first_name} {input_last_name}".lower(),
            f"{input_middle_name} {input_last_name} {input_first_name}".lower(),
            f"{input_last_name} {input_first_name} {input_middle_name}".lower(),
            f"{input_last_name} {input_middle_name} {input_first_name}".lower()
        ])
    else:
        # Adding variation for names without a middle name
        name_variations.add(f"{input_last_name} {input_first_name}".lower())


    fighter_link_name = name.replace(' ', '+')

    next_page = True
    page_num = 1
    fighter_found = False

    while next_page:
        url = f'https://www.sherdog.com/stats/fightfinder?association=&weightclass=&SearchTxt={fighter_link_name}&page={page_num}'
        try:
            source = requests.get(url, headers=headers)
            soup = BeautifulSoup(source.text, 'lxml')
            fighter_table = soup.find_all('table', class_='new_table fightfinder_result')

            for i in range(1, len(fighter_table[0].find_all('tr'))):
                fighter = fighter_table[0].find_all('tr')[i]
                fighter_name_link = fighter.find_all('td')[1].find('a')
                fighter_name = fighter_name_link.text.strip()

                # nickname variations
                nickname = fighter.find_all('td')[2].text.lower().strip('"')
                sherdog_name_parts = fighter_name.split(' ')
                sherdog_firstname = sherdog_name_parts[0]
                sherdog_lastname = sherdog_name_parts[-1]
                sherdog_firstname_nickname = f'{sherdog_firstname} {nickname}'
                sherdog_lastname_nickname = f'{nickname} {sherdog_lastname}'

                for name_variation in name_variations:
                    if isSameName(fighter_name, name_variation):
                        fighter_link = fighter_name_link['href']
                        return fighter_link
                    elif isSameName(sherdog_firstname_nickname, name_variation):
                        fighter_link = fighter_name_link['href']
                        return fighter_link
                    elif isSameName(sherdog_lastname_nickname, name_variation):
                        fighter_link = fighter_name_link['href']
                        return fighter_link
            page_num += 1

        except:
            next_page = False
        
    return None

def get_hometown_and_trainsoutof_4(name):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate, br',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Cache-Control': 'max-age=0'
    }
    fighter_link = get_fighter_link(name)

    if fighter_link is None:
        return None, None, None
    
    # Fetching additional details from the fighter's Sherdog profile
    fighter_url = 'https://www.sherdog.com' + fighter_link

    source2 = requests.get(fighter_url, headers=headers)
    soup2 = BeautifulSoup(source2.text, 'lxml')

    hometown = None
    hometown_elevation = None
    trains_out_of = None

    # Extracting hometown
    try:
        hometown = soup2.find_all('span', class_='locality')[0].text.strip()

    except IndexError:
        pass

    # Extracting training location
    try:
        trains_out_of = soup2.find_all('a', class_='association')[0].text.strip()
    except IndexError:
        pass

    if hometown:
        try:
            hometown_elevation = get_elevation(hometown)
        except:
            pass
        
    return hometown, hometown_elevation, trains_out_of

In [8]:
def log_missing_fighters(fighter_name, log_csv_path='missing_fighters.csv'):
    # Open the file in append mode or create it if it doesn't exist
    with open(log_csv_path, 'a', newline='') as log_file:
        writer = csv.writer(log_file)
        # Write the header if the file is new
        if log_file.tell() == 0:
            writer.writerow(['Fighter Name'])
        # Write the fighter's name
        writer.writerow([fighter_name])

In [9]:
def scrape_fighter_stats(include_progress_bar=True):
    # Initialize the CSV file for writing
    with open('STATIC_UFC_FIGTERS.csv', 'w', newline='') as csvfile:
        # Define the column headers for the CSV
        fieldnames = ['Name', 'Height', 'Reach', 'STANCE', 'DOB', 'ID', 'Hometown', 'Hometown_Elevation', 'Trains_Out_Of']
        
        # Initialize the CSV writer
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        # Write the header row to the CSV
        writer.writeheader()

        # Generate a list of lowercase alphabets to iterate through
        alphabets = list(string.ascii_lowercase)

        # Loop through each alphabet letter
        for letter in alphabets:
            # Fetch the fighter list page for the current alphabet letter
            source = requests.get(f'http://www.ufcstats.com/statistics/fighters?char={letter}&page=all').text
            soup = BeautifulSoup(source, "lxml")
            
            # Get all fighter entries on the page
            fighter_entries = soup.find_all('tr', attrs={'class': 'b-statistics__table-row'})

            # Use tqdm for progress bar if include_progress_bar is True
            fighter_range = tqdm(fighter_entries, desc=f"Processing fighters starting with '{letter}'", leave=True) if include_progress_bar else fighter_entries


            # Loop through each fighter entry on the page
            for fighter in fighter_range:
                a_href = fighter.find('a')
                
                # Check if the fighter entry has a URL
                if a_href is not None:
                    # Fetch the fighter's individual stats page
                    source2 = requests.get(a_href.get("href")).text
                    
                    # Extract the fighter ID from the URL
                    ID = a_href.get("href").split('/')[-1].strip()
                    
                    soup2 = BeautifulSoup(source2, "lxml")

                    # Extract and store the fighter's name
                    name = soup2.find('span', {'class': 'b-content__title-highlight'}).text.strip()
                    # print(name)

                    # Initialize a dictionary to hold the fighter's stats
                    fighter_stats = {'Name': name, 'ID': ID}

                    # Extract and store other stats like Height, Reach, Stance, and DOB
                    stats = soup2.find_all('li', attrs={'class': 'b-list__box-list-item'})
                    for stat in stats:
                        cleaned_stat = [string.strip() for string in stat.text.split(":")]
                        if len(cleaned_stat) > 1:
                            key = cleaned_stat[0]
                            value = cleaned_stat[1]
                            fighter_stats[key] = value

                    # Extract and store the fighter's hometown and trains out of
                    hometown, hometown_elevation, trains_out_of = get_hometown_and_trainsoutof_4(name)
                        
                    fighter_stats['Hometown'] = hometown
                    fighter_stats['Hometown_Elevation'] = hometown_elevation
                    fighter_stats['Trains_Out_Of'] = trains_out_of
                    if (hometown is None) and (trains_out_of is None):
                        log_missing_fighters(name)
                    # try:
                        
                    # except Exception as e:
                    #     print(f"Error fetching hometown and trains out of for {name} error: {e}")
                    # Filter out keys not in fieldnames
                    filtered_fighter_stats = {k: v for k, v in fighter_stats.items() if k in fieldnames}
                    writer.writerow(filtered_fighter_stats)

## Scrape Static Fighter Data

In [None]:
scrape_fighter_stats()

### Main Scraping Functions

In [3]:
"""
RUN THIS BOX TO SEE THE CURRENT RESULTS AND WHAT THE DATA SHOULD LOOK LIKE.

Eventually, this collection of functions should be under a FightData object 
-> Then we can just call FightData.scrape_ufc_stats() as well as update the columns 
after every new fight -> make this a cron job that runs every sat night after the fights.

Feel free to optimize this if it looks a little chunky.

Data scraped should be in the following format:
[col1_title, col2_title, col3_title, ...]
[r1c1_data,  r1c2_data,  r1c3_data,  ...]
[r2c1_data,  r2c2_data,  r2c3_data,  ...]
[r3c1_data,  r3c2_data,  r3c3_data,  ...]
[r4c1_data,  r4c2_data,  r4c3_data,  ...]
...

The data will then be put into a CSV file.
"""

# Add to these columns with new data scraped
columns = ['fight_night_title', 'date', 'location', 'location_elevation', 'fighter_a', 'fighter_a_id', 'fighter_b', 'fighter_b_id',
            'winner', 'winner_id', 'division', 'outcome_method', 'outcome_round', 'outcome_time', 'outcome_format', 'referee', 'outcome_detail',
            'fighter_a_round_1_kd', 'fighter_a_round_1_sig_str_landed', 'fighter_a_round_1_sig_str_attempted', 'fighter_a_round_1_sig_str_pct', 
            'fighter_a_round_1_total_str_landed', 'fighter_a_round_1_total_str_attempted', 'fighter_a_round_1_td_landed', 'fighter_a_round_1_td_attempted', 
            'fighter_a_round_1_td_pct', 'fighter_a_round_1_sub_att', 'fighter_a_round_1_rev', 'fighter_a_round_1_ctrl',
            'fighter_a_round_2_kd', 'fighter_a_round_2_sig_str_landed', 'fighter_a_round_2_sig_str_attempted', 'fighter_a_round_2_sig_str_pct',
            'fighter_a_round_2_total_str_landed', 'fighter_a_round_2_total_str_attempted', 'fighter_a_round_2_td_landed', 'fighter_a_round_2_attempted',
            'fighter_a_round_2_td_pct', 'fighter_a_round_2_sub_att', 'fighter_a_round_2_rev', 'fighter_a_round_2_ctrl', 'fighter_a_round_3_kd',
            'fighter_a_round_3_sig_str_landed', 'fighter_a_round_3_sig_str_attempted', 'fighter_a_round_3_sig_str_pct', 'fighter_a_round_3_total_str_landed',
            'fighter_a_round_3_total_str_attempted', 'fighter_a_round_3_td_landed', 'fighter_a_round_3_attempted', 'fighter_a_round_3_td_pct',
            'fighter_a_round_3_sub_att', 'fighter_a_round_3_rev', 'fighter_a_round_3_ctrl', 'fighter_a_round_4_kd', 'fighter_a_round_4_sig_str_landed',
            'fighter_a_round_4_sig_str_attempted', 'fighter_a_round_4_sig_str_pct', 'fighter_a_round_4_total_str_landed', 'fighter_a_round_4_total_str_attempted',
            'fighter_a_round_4_td_landed', 'fighter_a_round_4_attempted', 'fighter_a_round_4_td_pct', 'fighter_a_round_4_sub_att',
            'fighter_a_round_4_rev', 'fighter_a_round_4_ctrl', 'fighter_a_round_5_kd', 'fighter_a_round_5_sig_str_landed',
            'fighter_a_round_5_sig_str_attempted', 'fighter_a_round_5_sig_str_pct', 'fighter_a_round_5_total_str_landed', 'fighter_a_round_5_total_str_attempted',
            'fighter_a_round_5_td_landed', 'fighter_a_round_5_attempted', 'fighter_a_round_5_td_pct', 'fighter_a_round_5_sub_att', 'fighter_a_round_5_rev',
            'fighter_a_round_5_ctrl', 'fighter_a_total_kd', 'fighter_a_total_sig_str_landed', 'fighter_a_total_sig_str_attempted', 'fighter_a_total_sig_str_pct',
            'fighter_a_total_total_str_landed', 'fighter_a_total_total_str_attempted', 'fighter_a_total_td_landed', 'fighter_a_total_attempted', 'fighter_a_total_td_pct',
            'fighter_a_total_sub_att', 'fighter_a_total_rev', 'fighter_a_total_ctrl', 'fighter_b_round_1_kd', 'fighter_b_round_1_sig_str_landed',
            'fighter_b_round_1_sig_str_attempted', 'fighter_b_round_1_sig_str_pct', 'fighter_b_round_1_total_str_landed', 'fighter_b_round_1_total_str_attempted',
            'fighter_b_round_1_td_landed', 'fighter_b_round_1_attempted', 'fighter_b_round_1_td_pct', 'fighter_b_round_1_sub_att', 'fighter_b_round_1_rev', 
            'fighter_b_round_1_ctrl', 'fighter_b_round_2_kd', 'fighter_b_round_2_sig_str_landed', 'fighter_b_round_2_sig_str_attempted', 'fighter_b_round_2_sig_str_pct', 
            'fighter_b_round_2_total_str_landed', 'fighter_b_round_2_total_str_attempted', 'fighter_b_round_2_td_landed', 'fighter_b_round_2_attempted', 
            'fighter_b_round_2_td_pct', 'fighter_b_round_2_sub_att', 'fighter_b_round_2_rev', 'fighter_b_round_2_ctrl', 'fighter_b_round_3_kd', 
            'fighter_b_round_3_sig_str_landed', 'fighter_b_round_3_sig_str_attempted', 'fighter_b_round_3_sig_str_pct', 'fighter_b_round_3_total_str_landed', 
            'fighter_b_round_3_total_str_attempted', 'fighter_b_round_3_td_landed', 'fighter_b_round_3_attempted', 'fighter_b_round_3_td_pct',
            'fighter_b_round_3_sub_att', 'fighter_b_round_3_rev', 'fighter_b_round_3_ctrl', 'fighter_b_round_4_kd', 'fighter_b_round_4_sig_str_landed', 
            'fighter_b_round_4_sig_str_attempted', 'fighter_b_round_4_sig_str_pct', 'fighter_b_round_4_total_str_landed', 'fighter_b_round_4_total_str_attempted', 
            'fighter_b_round_4_td_landed', 'fighter_b_round_4_attempted', 'fighter_b_round_4_td_pct', 'fighter_b_round_4_sub_att', 'fighter_b_round_4_rev', 
            'fighter_b_round_4_ctrl', 'fighter_b_round_5_kd', 'fighter_b_round_5_sig_str_landed', 'fighter_b_round_5_sig_str_attempted', 'fighter_b_round_5_sig_str_pct', 
            'fighter_b_round_5_total_str_landed', 'fighter_b_round_5_total_str_attempted', 'fighter_b_round_5_td_landed', 'fighter_b_round_5_attempted', 
            'fighter_b_round_5_td_pct', 'fighter_b_round_5_sub_att', 'fighter_b_round_5_rev', 'fighter_b_round_5_ctrl', 'fighter_b_total_kd', 
            'fighter_b_total_sig_str_landed', 'fighter_b_total_sig_str_attempted', 'fighter_b_total_sig_str_pct', 'fighter_b_total_total_str_landed', 
            'fighter_b_total_total_str_attempted', 'fighter_b_total_td_landed', 'fighter_b_total_attempted', 'fighter_b_total_td_pct', 'fighter_b_total_sub_att', 
            'fighter_b_total_rev', 'fighter_b_total_ctrl', 'fighter_a_round_1_head_shots_landed', 'fighter_a_round_1_head_shots_attempted', 
            'fighter_a_round_1_body_shots_landed', 'fighter_a_round_1_body_shots_attempted', 'fighter_a_round_1_leg_shots_landed', 'fighter_a_round_1_leg_shots_attempted', 
            'fighter_a_round_1_distance_shots_landed', 'fighter_a_round_1_distance_shots_attempted', 'fighter_a_round_1_clinch_landed', 'fighter_a_round_1_clinch_attempted', 
            'fighter_a_round_1_ground_landed', 'fighter_a_round_1_ground_attempted', 'fighter_a_round_2_head_shots_landed', 'fighter_a_round_2_head_shots_attempted', 
            'fighter_a_round_2_body_shots_landed', 'fighter_a_round_2_body_shots_attempted', 'fighter_a_round_2_leg_shots_landed', 'fighter_a_round_2_leg_shots_attempted', 
            'fighter_a_round_2_distance_shots_landed', 'fighter_a_round_2_distance_shots_attempted', 'fighter_a_round_2_clinch_landed', 'fighter_a_round_2_clinch_attempted', 
            'fighter_a_round_2_ground_landed', 'fighter_a_round_2_ground_attempted', 'fighter_a_round_3_head_shots_landed', 'fighter_a_round_3_head_shots_attempted', 
            'fighter_a_round_3_body_shots_landed', 'fighter_a_round_3_body_shots_attempted', 'fighter_a_round_3_leg_shots_landed', 'fighter_a_round_3_leg_shots_attempted', 
            'fighter_a_round_3_distance_shots_landed', 'fighter_a_round_3_distance_shots_attempted', 'fighter_a_round_3_clinch_landed', 'fighter_a_round_3_clinch_attempted', 
            'fighter_a_round_3_ground_landed', 'fighter_a_round_3_ground_attempted', 'fighter_a_round_4_head_shots_landed', 'fighter_a_round_4_head_shots_attempted', 
            'fighter_a_round_4_body_shots_landed', 'fighter_a_round_4_body_shots_attempted', 'fighter_a_round_4_leg_shots_landed', 'fighter_a_round_4_leg_shots_attempted', 
            'fighter_a_round_4_distance_shots_landed', 'fighter_a_round_4_distance_shots_attempted', 'fighter_a_round_4_clinch_landed', 'fighter_a_round_4_clinch_attempted', 
            'fighter_a_round_4_ground_landed', 'fighter_a_round_4_ground_attempted', 'fighter_a_round_5_head_shots_landed', 'fighter_a_round_5_head_shots_attempted', 
            'fighter_a_round_5_body_shots_landed', 'fighter_a_round_5_body_shots_attempted', 'fighter_a_round_5_leg_shots_landed', 'fighter_a_round_5_leg_shots_attempted', 
            'fighter_a_round_5_distance_shots_landed', 'fighter_a_round_5_distance_shots_attempted', 'fighter_a_round_5_clinch_landed', 'fighter_a_round_5_clinch_attempted', 
            'fighter_a_round_5_ground_landed', 'fighter_a_round_5_ground_attempted', 'fighter_a_total_head_shots_landed', 'fighter_a_total_head_shots_attempted', 
            'fighter_a_total_body_shots_landed', 'fighter_a_total_body_shots_attempted', 'fighter_a_total_leg_shots_landed', 'fighter_a_total_leg_shots_attempted', 
            'fighter_a_total_distance_shots_landed', 'fighter_a_total_distance_shots_attempted', 'fighter_a_total_clinch_landed', 'fighter_a_total_clinch_attempted', 
            'fighter_a_total_ground_landed', 'fighter_a_total_ground_attempted', 'fighter_b_round_1_head_shots_landed', 'fighter_b_round_1_head_shots_attempted', 
            'fighter_b_round_1_body_shots_landed', 'fighter_b_round_1_body_shots_attempted', 'fighter_b_round_1_leg_shots_landed', 'fighter_b_round_1_leg_shots_attempted', 
            'fighter_b_round_1_distance_shots_landed', 'fighter_b_round_1_distance_shots_attempted', 'fighter_b_round_1_clinch_landed', 'fighter_b_round_1_clinch_attempted', 
            'fighter_b_round_1_ground_landed', 'fighter_b_round_1_ground_attempted', 'fighter_b_round_2_head_shots_landed', 'fighter_b_round_2_head_shots_attempted', 
            'fighter_b_round_2_body_shots_landed', 'fighter_b_round_2_body_shots_attempted', 'fighter_b_round_2_leg_shots_landed', 'fighter_b_round_2_leg_shots_attempted', 
            'fighter_b_round_2_distance_shots_landed', 'fighter_b_round_2_distance_shots_attempted', 'fighter_b_round_2_clinch_landed', 'fighter_b_round_2_clinch_attempted', 
            'fighter_b_round_2_ground_landed', 'fighter_b_round_2_ground_attempted', 'fighter_b_round_3_head_shots_landed', 'fighter_b_round_3_head_shots_attempted', 
            'fighter_b_round_3_body_shots_landed', 'fighter_b_round_3_body_shots_attempted', 'fighter_b_round_3_leg_shots_landed', 'fighter_b_round_3_leg_shots_attempted', 
            'fighter_b_round_3_distance_shots_landed', 'fighter_b_round_3_distance_shots_attempted', 'fighter_b_round_3_clinch_landed', 'fighter_b_round_3_clinch_attempted', 
            'fighter_b_round_3_ground_landed', 'fighter_b_round_3_ground_attempted', 'fighter_b_round_4_head_shots_landed', 'fighter_b_round_4_head_shots_attempted', 
            'fighter_b_round_4_body_shots_landed', 'fighter_b_round_4_body_shots_attempted', 'fighter_b_round_4_leg_shots_landed', 'fighter_b_round_4_leg_shots_attempted', 
            'fighter_b_round_4_distance_shots_landed', 'fighter_b_round_4_distance_shots_attempted', 'fighter_b_round_4_clinch_landed', 'fighter_b_round_4_clinch_attempted', 
            'fighter_b_round_4_ground_landed', 'fighter_b_round_4_ground_attempted', 'fighter_b_round_5_head_shots_landed', 'fighter_b_round_5_head_shots_attempted', 
            'fighter_b_round_5_body_shots_landed', 'fighter_b_round_5_body_shots_attempted', 'fighter_b_round_5_leg_shots_landed', 'fighter_b_round_5_leg_shots_attempted', 
            'fighter_b_round_5_distance_shots_landed', 'fighter_b_round_5_distance_shots_attempted', 'fighter_b_round_5_clinch_landed', 'fighter_b_round_5_clinch_attempted', 
            'fighter_b_round_5_ground_landed', 'fighter_b_round_5_ground_attempted', 'fighter_b_total_head_shots_landed', 'fighter_b_total_head_shots_attempted', 
            'fighter_b_total_body_shots_landed', 'fighter_b_total_body_shots_attempted', 'fighter_b_total_leg_shots_landed', 'fighter_b_total_leg_shots_attempted', 
            'fighter_b_total_distance_shots_landed', 'fighter_b_total_distance_shots_attempted', 'fighter_b_total_clinch_landed', 'fighter_b_total_clinch_attempted', 
            'fighter_b_total_ground_landed', 'fighter_b_total_ground_attempted']

UFC_STATS_URL = 'http://www.ufcstats.com/statistics/events/completed?page=all'

def scrape_ufc_stats(include_progress_bar=True):
    # Open the CSV file in append mode
    # with open('ufc_men_stats_by_fight.csv', 'a', newline='') as men_csv_file, open('ufc_women_stats_by_fight.csv', 'a', newline='') as women_csv_file:
    with open('ufc_men_stats_by_fight.csv', 'a', newline='') as men_csv_file:
        writer1 = csv.writer(men_csv_file)
        # writer2 = csv.writer(women_csv_file)

        # Write the header only if the file is empty
        if men_csv_file.tell() == 0:
            writer1.writerow(columns)  

        # if women_csv_file.tell() == 0:
        #     writer2.writerow(columns)

        # Scrape the data
        source_main = requests.get(UFC_STATS_URL).text
        soup_main = BeautifulSoup(source_main, "lxml")
        fights_main = soup_main.find_all('tr', attrs={'class': 'b-statistics__table-row'})
        # Use tqdm for progress bar if include_progress_bar is True
        fight_range = tqdm(range(len(fights_main) - 1, 0, -1), desc="Scraping UFC Stats", leave=True) if include_progress_bar else range(len(fights_main) - 1, 0, -1)

        # Loop through each fight card     
        for i in fight_range:
            fight_night = fights_main[i]
            fight_night_ahref = fight_night.find('a')
            
            if fight_night_ahref is not None:
                fight_night_title = fight_night_ahref.text.strip()
                fight_night_link = fight_night_ahref.get("href")

                # Create an empty list to hold the data for each fight
                men_data = []
                women_data = []

                get_fights(fight_night_link, men_data, women_data, fight_night_title)

                # Write the newly scraped data to the CSV
                for row in men_data:
                    writer1.writerow(row)

                # for row in women_data:
                #     writer2.writerow(row)
            

# Gets the individual fights from the fight night
def get_fights(link, men_data, women_data, fight_night_title):
    source_fight_night = requests.get(link).text
    soup_fight_night = BeautifulSoup(source_fight_night, "lxml")
    fight_night_fights = soup_fight_night.find_all('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})
    date, location, location_elevation = get_fight_date_and_location(soup_fight_night)
    for j in range(len(fight_night_fights)-1, -1, -1):
        fight = fight_night_fights[j]
        fight_ahref = fight.find('a')
        if fight_ahref is not None:
            fight_link = fight_ahref.get("href")
            fighters_data = get_fighters(fight_link)

            fight_overview_data = get_fight_overview(fight_link)

            fight_totals = get_fight_totals(fight_link, fight_overview_data[2])
            fight_totals_tuple = tuple(element for tupl in fight_totals for element in tupl)
            fight_sig_strikes = get_fight_sig_strikes(fight_link, fight_overview_data[2])
            fight_sig_strikes_tuple = tuple(element for tupl in fight_sig_strikes for element in tupl)
            
            # check if the fight was a women fight and write data to correct location
            women_fight = any("women" in p.text.strip().lower() for p in fight.findChildren('p'))
            if women_fight:
                women_data.append([fight_night_title] + [date] + [location] + list(fighters_data) + list(fight_overview_data) + list(fight_totals_tuple) + list(fight_sig_strikes_tuple))
            else:
                men_data.append([fight_night_title] + [date] + [location] + [location_elevation] + list(fighters_data) + list(fight_overview_data) + list(fight_totals_tuple) + list(fight_sig_strikes_tuple))
            #display(data)

        # Remove after development
        # break

# Gets the fighters' names and links
def get_fighters(link):
    source_fight = requests.get(link).text
    soup_fight = BeautifulSoup(source_fight, "lxml")
    fighters = soup_fight.find_all('div', class_='b-fight-details__person')

    fighter_a_element = fighters[0]
    fighter_a_status_element = fighter_a_element.select_one('.b-fight-details__person-status')
    fighter_a_status = fighter_a_status_element.get_text(strip=True) if fighter_a_status_element else None
    fighter_a_name_element = fighter_a_element.select_one('.b-fight-details__person-name a')
    fighter_a_name = fighter_a_name_element.get_text(strip=True) if fighter_a_name_element else None
    fighter_a_id = fighter_a_name_element.get("href").split('/')[-1].strip()
    fighter_b_element = fighters[1]
    fighter_b_status_element = fighter_b_element.select_one('.b-fight-details__person-status')
    fighter_b_status = fighter_b_status_element.get_text(strip=True) if fighter_b_status_element else None
    fighter_b_name_element = fighter_b_element.select_one('.b-fight-details__person-name a')
    fighter_b_name = fighter_b_name_element.get_text(strip=True) if fighter_b_name_element else None
    fighter_b_id = fighter_b_name_element.get("href").split('/')[-1].strip()

    if fighter_a_status == 'W':
        winner_name = fighter_a_name
        winner_id = fighter_a_id
    elif fighter_b_status == 'W':
        winner_name = fighter_b_name
        winner_id = fighter_b_id
    elif fighter_a_status == fighter_b_status == 'D':
        winner_name = 'Draw'
        winner_id = None
    elif fighter_a_status == fighter_b_status == 'NC':
        winner_name = 'No Contest'
        winner_id = None

    return fighter_a_name, fighter_a_id, fighter_b_name, fighter_b_id, winner_name, winner_id

# Gets the overview box information
def get_fight_overview(link):
    source = requests.get(link).text
    soup = BeautifulSoup(source, "lxml")

    details = soup.find_all('i', attrs={'class' : 'b-fight-details__text-item'})

    division = ' '.join(soup.find('i', attrs={'class' : 'b-fight-details__fight-title'}).text.strip().rsplit((' ', 1)[0])[:-1])
    outcome_method = soup.find('i', attrs={'style' : 'font-style: normal'}).text.strip()
    outcome_round = details[0].text.strip().split(' ')[-1]
    outcome_time = details[1].text.strip().split(' ')[-1]
    outcome_format = details[2].text.strip().split(' ')[-1]
    referee = details[3].text.strip().split(' ')[-1]
    outcome_detail = ' '.join([word for word in soup.find_all(True, attrs={'class' : 'b-fight-details__text'})[-1].text.strip().split(' ') if word.strip()][1:])

    return division, outcome_method, outcome_round, outcome_time, outcome_format, referee, outcome_detail

def get_fight_totals(link, round):
    source = requests.get(link).text
    soup = BeautifulSoup(source, "lxml")
    stats = soup.find_all('section', attrs={'class' : 'b-fight-details__section js-fight-section'})
    player_a_total_stats = player_a_round_1_stats = player_a_round_2_stats = player_a_round_3_stats = player_a_round_4_stats = player_a_round_5_stats = (None, None, None, None, None, None, None, None, None, None, None, None)
    player_b_total_stats = player_b_round_1_stats = player_b_round_2_stats = player_b_round_3_stats = player_b_round_4_stats = player_b_round_5_stats = (None, None, None, None, None, None, None, None, None, None, None, None)
    if(len(stats) > 1):
        total_stats = stats[1].find('table').find_all('tr', attrs={'class' : 'b-fight-details__table-row'})[1].find_all('p', attrs={'class' : 'b-fight-details__table-text'})
       
        for i in range(0, int(round)):
            if i == 0: 
                totals_per_round_1 = stats[2].find('table').find_all('tr', attrs={'class' : 'b-fight-details__table-row'})[1].find_all('p', attrs={'class' : 'b-fight-details__table-text'})
                player_a_round_1_stats = get_total_fight_stats(totals_per_round_1)[0]
                player_b_round_1_stats = get_total_fight_stats(totals_per_round_1)[1]
            elif i == 1:
                totals_per_round_2 = stats[2].find('table').find_all('tr', attrs={'class' : 'b-fight-details__table-row'})[2].find_all('p', attrs={'class' : 'b-fight-details__table-text'})
                player_a_round_2_stats = get_total_fight_stats(totals_per_round_2)[0]
                player_b_round_2_stats = get_total_fight_stats(totals_per_round_2)[1]
            elif i == 2:
                totals_per_round_3 = stats[2].find('table').find_all('tr', attrs={'class' : 'b-fight-details__table-row'})[3].find_all('p', attrs={'class' : 'b-fight-details__table-text'})
                player_a_round_3_stats = get_total_fight_stats(totals_per_round_3)[0]
                player_b_round_3_stats = get_total_fight_stats(totals_per_round_3)[1]
            elif i == 3:
                totals_per_round_4 = stats[2].find('table').find_all('tr', attrs={'class' : 'b-fight-details__table-row'})[4].find_all('p', attrs={'class' : 'b-fight-details__table-text'})
                player_a_round_4_stats = get_total_fight_stats(totals_per_round_4)[0]
                player_b_round_4_stats = get_total_fight_stats(totals_per_round_4)[1]
            elif i == 4:
                totals_per_round_5 = stats[2].find('table').find_all('tr', attrs={'class' : 'b-fight-details__table-row'})[5].find_all('p', attrs={'class' : 'b-fight-details__table-text'})
                player_a_round_5_stats = get_total_fight_stats(totals_per_round_5)[0]
                player_b_round_5_stats = get_total_fight_stats(totals_per_round_5)[1]

        player_a_total_stats = get_total_fight_stats(total_stats)[0]
        player_b_total_stats = get_total_fight_stats(total_stats)[1]

    return player_a_round_1_stats, player_a_round_2_stats, player_a_round_3_stats, player_a_round_4_stats, player_a_round_5_stats, player_a_total_stats, player_b_round_1_stats, player_b_round_2_stats, player_b_round_3_stats, player_b_round_4_stats, player_b_round_5_stats, player_b_total_stats, 

def get_total_fight_stats(totals):
    fighter_a_kd = totals[2].text.strip()
    fighter_b_kd = totals[3].text.strip()
    fighter_a_sig_str_landed = totals[4].text.split("of")[0].strip()
    fighter_a_sig_str_attempted = totals[4].text.split("of")[1].strip()
    fighter_b_sig_str_landed = totals[5].text.split("of")[0].strip()
    fighter_b_sig_str_attempted = totals[5].text.split("of")[1].strip()
    fighter_a_sig_str_per = totals[6].text.strip()
    fighter_b_sig_str_per = totals[7].text.strip()
    fighter_a_total_str_landed = totals[8].text.split("of")[0].strip()
    fighter_a_total_str_attempted = totals[8].text.split("of")[1].strip()
    fighter_b_total_str_landed = totals[9].text.split("of")[0].strip()
    fighter_b_total_str_attempted = totals[9].text.split("of")[1].strip()
    fighter_a_total_td_landed = totals[10].text.split("of")[0].strip()
    fighter_a_total_td_attempted = totals[10].text.split("of")[1].strip()
    fighter_b_total_td_landed = totals[11].text.split("of")[0].strip()
    fighter_b_total_td_attempted = totals[11].text.split("of")[1].strip()
    fighter_a_total_td_per = totals[12].text.strip()
    fighter_b_total_td_per = totals[13].text.strip()
    fighter_a_total_sub_att = totals[14].text.strip()
    fighter_b_total_sub_att = totals[15].text.strip()
    fighter_a_total_rev = totals[16].text.strip()
    fighter_b_total_rev = totals[17].text.strip()
    fighter_a_total_ctrl = totals[18].text.strip()
    fighter_b_total_ctrl = totals[19].text.strip()

    fighter_a = fighter_a_kd, fighter_a_sig_str_landed, fighter_a_sig_str_attempted, fighter_a_sig_str_per, fighter_a_total_str_landed, fighter_a_total_str_attempted, fighter_a_total_td_landed, fighter_a_total_td_attempted, fighter_a_total_td_per, fighter_a_total_sub_att, fighter_a_total_rev, fighter_a_total_ctrl
    fighter_b = fighter_b_kd, fighter_b_sig_str_landed, fighter_b_sig_str_attempted, fighter_b_sig_str_per, fighter_b_total_str_landed, fighter_b_total_str_attempted, fighter_b_total_td_landed, fighter_b_total_td_attempted, fighter_b_total_td_per, fighter_b_total_sub_att, fighter_b_total_rev, fighter_b_total_ctrl

    return fighter_a, fighter_b

def get_fight_sig_strikes(link, round):
    source = requests.get(link).text
    soup = BeautifulSoup(source, "lxml")
    stats = soup.find_all('section', attrs={'class' : 'b-fight-details__section js-fight-section'})
    player_a_total_stats = player_a_round_1_stats = player_a_round_2_stats = player_a_round_3_stats = player_a_round_4_stats = player_a_round_5_stats = (None, None, None, None, None, None, None, None, None, None, None, None)
    player_b_total_stats = player_b_round_1_stats = player_b_round_2_stats = player_b_round_3_stats = player_b_round_4_stats = player_b_round_5_stats = (None, None, None, None, None, None, None, None, None, None, None, None)
    if len(soup.find_all('table', attrs={'style': 'width: 745px'})) > 1:
        total_stats = soup.find_all('table', attrs={'style': 'width: 745px'})[1].find_all('tr', attrs={'class' : 'b-fight-details__table-row'})[1].find_all('p', attrs={'class' : 'b-fight-details__table-text'})
        for i in range(0, int(round)):
            if i == 0:
                totals_per_round_1 = stats[4].find('table').find_all('tr', attrs={'class' : 'b-fight-details__table-row'})[1].find_all('p', attrs={'class' : 'b-fight-details__table-text'})
                player_a_round_1_stats = get_fight_sig_stats(totals_per_round_1)[0]
                player_b_round_1_stats = get_fight_sig_stats(totals_per_round_1)[1]
            elif i == 1:
                totals_per_round_2 = stats[4].find('table').find_all('tr', attrs={'class' : 'b-fight-details__table-row'})[2].find_all('p', attrs={'class' : 'b-fight-details__table-text'})
                player_a_round_2_stats = get_fight_sig_stats(totals_per_round_2)[0]
                player_b_round_2_stats = get_fight_sig_stats(totals_per_round_2)[1]
            elif i == 2:
                totals_per_round_3 = stats[4].find('table').find_all('tr', attrs={'class' : 'b-fight-details__table-row'})[3].find_all('p', attrs={'class' : 'b-fight-details__table-text'})
                player_a_round_3_stats = get_fight_sig_stats(totals_per_round_3)[0]
                player_b_round_3_stats = get_fight_sig_stats(totals_per_round_3)[1]
            elif i == 3:
                totals_per_round_4 = stats[4].find('table').find_all('tr', attrs={'class' : 'b-fight-details__table-row'})[4].find_all('p', attrs={'class' : 'b-fight-details__table-text'})
                player_a_round_4_stats = get_fight_sig_stats(totals_per_round_4)[0]
                player_b_round_4_stats = get_fight_sig_stats(totals_per_round_4)[1]
            elif i == 4:
                totals_per_round_5 = stats[4].find('table').find_all('tr', attrs={'class' : 'b-fight-details__table-row'})[5].find_all('p', attrs={'class' : 'b-fight-details__table-text'})
                player_a_round_5_stats = get_fight_sig_stats(totals_per_round_5)[0]
                player_b_round_5_stats = get_fight_sig_stats(totals_per_round_5)[1]
        
        player_a_total_stats = get_fight_sig_stats(total_stats)[0]
        player_b_total_stats = get_fight_sig_stats(total_stats)[1]

    return player_a_round_1_stats, player_a_round_2_stats, player_a_round_3_stats, player_a_round_4_stats, player_a_round_5_stats, player_a_total_stats, player_b_round_1_stats, player_b_round_2_stats, player_b_round_3_stats, player_b_round_4_stats, player_b_round_5_stats, player_b_total_stats, 

def get_fight_sig_stats(totals):
    fighter_a_head_shots_landed = totals[6].text.split("of")[0].strip()
    fighter_a_head_shots_attempted = totals[6].text.split("of")[1].strip()
    fighter_b_head_shots_landed = totals[7].text.split("of")[0].strip()
    fighter_b_head_shots_attempted = totals[7].text.split("of")[1].strip()
    fighter_a_body_shots_landed = totals[8].text.split("of")[0].strip()
    fighter_a_body_shots_attempted = totals[8].text.split("of")[1].strip()
    fighter_b_body_shots_landed = totals[9].text.split("of")[0].strip()
    fighter_b_body_shots_attempted = totals[9].text.split("of")[1].strip()
    fighter_a_leg_shots_landed = totals[10].text.split("of")[0].strip()
    fighter_a_leg_shots_attempted = totals[10].text.split("of")[1].strip()
    fighter_b_leg_shots_landed = totals[11].text.split("of")[0].strip()
    fighter_b_leg_shots_attempted = totals[11].text.split("of")[1].strip()
    fighter_a_distance_shots_landed = totals[12].text.split("of")[0].strip()
    fighter_a_distance_shots_attempted = totals[12].text.split("of")[1].strip()
    fighter_b_distance_shots_landed = totals[13].text.split("of")[0].strip()
    fighter_b_distance_shots_attempted = totals[13].text.split("of")[1].strip()
    fighter_a_clinch_landed = totals[14].text.split("of")[0].strip()
    fighter_a_clinch_attempted = totals[14].text.split("of")[1].strip()
    fighter_b_clinch_landed = totals[15].text.split("of")[0].strip()
    fighter_b_clinch_attempted = totals[15].text.split("of")[1].strip()
    fighter_a_ground_landed = totals[16].text.split("of")[0].strip()
    fighter_a_ground_attempted = totals[16].text.split("of")[1].strip()
    fighter_b_ground_landed = totals[17].text.split("of")[0].strip()
    fighter_b_ground_attempted = totals[17].text.split("of")[1].strip()

    fighter_a = fighter_a_head_shots_landed, fighter_a_head_shots_attempted, fighter_a_body_shots_landed, fighter_a_body_shots_attempted, fighter_a_leg_shots_landed, fighter_a_leg_shots_attempted, fighter_a_distance_shots_landed, fighter_a_distance_shots_attempted, fighter_a_clinch_landed, fighter_a_clinch_attempted, fighter_a_ground_landed, fighter_a_ground_attempted
    fighter_b = fighter_b_head_shots_landed, fighter_b_head_shots_attempted, fighter_b_body_shots_landed, fighter_b_body_shots_attempted, fighter_b_leg_shots_landed, fighter_b_leg_shots_attempted, fighter_b_distance_shots_landed, fighter_b_distance_shots_attempted, fighter_b_clinch_landed, fighter_b_clinch_attempted, fighter_b_ground_landed, fighter_b_ground_attempted

    return fighter_a, fighter_b

def get_fight_date_and_location(soup):
    data = soup.find_all('li', attrs={'class' : 'b-list__box-list-item'})
    date = data[0].text.strip().split("Date:")[1].strip()
    location = data[1].text.strip().split("Location:")[1].strip()
    location_elevation = get_elevation(location)
    return date, location, location_elevation



<a class="b-link b-link_style_black" href="http://www.ufcstats.com/event-details/a6a9ab5a824e8f66">
                          UFC 2: No Way Out
                        </a>
<a class="b-link b-link_style_black" href="http://www.ufcstats.com/event-details/1a49e0670dfaca31">
                          UFC 3: The American Dream
                        </a>
<a class="b-link b-link_style_black" href="http://www.ufcstats.com/event-details/b60391da771deefe">
                          UFC 4: Revenge of the Warriors
                        </a>
<a class="b-link b-link_style_black" href="http://www.ufcstats.com/event-details/dedc3bb440d09554">
                          UFC 5: The Return of the Beast
                        </a>
<a class="b-link b-link_style_black" href="http://www.ufcstats.com/event-details/1c3f5e85b59ec710">
                          UFC 6: Clash of the Titans
                        </a>
<a class="b-link b-link_style_black" href="http://www.ufcstats.com/event-details/5af480a3b2e

KeyboardInterrupt: 

In [None]:
scrape_ufc_stats()