# Image Collector from UFC website
### 1. Importing Libraries

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

### 2. Importing the dataset with all fighter names

In [2]:
fighters = pd.read_csv('../processed_data/all_fighter_details.csv', index_col=0)
fighters

### 3. Setting up the headers and the URL

In [3]:
ufc_url = 'https://www.ufc.com/athlete/jose-aldo'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
    'referer': 'https://www.ufc.com/'
}

### 4. Looping through the fighter names and getting the image URL by scraping the UFC website

In [6]:
for index, row in fighters.iterrows():
    row['format_name'] = row['FIGHTER'].replace(' ', '-').lower()
    ufc_url = f'https://www.ufc.com/athlete/{row["format_name"]}'
    response = requests.get(ufc_url, headers=headers)
    if response.status_code == 200:
       #  print(f"Retrieved URL: {ufc_url}")
        soup = BeautifulSoup(response.text, 'html.parser')
        image_tag = soup.find('meta', property='og:image')
        if image_tag:
            image_url = image_tag['content']
            print(f"Image URL: {image_url}")
            fighters.loc[index, 'image_url'] = image_url
        else:
           print(f"Failed to retrieve image")
    else:
        print(f"Failed to retrieve URL: {ufc_url}")

### 5. Final DataFrame with all the fighter details and image URLs

In [50]:
fighters

In [51]:
# give me the amount of rows that dont contain an image url
fighters['image_url'].isnull().sum()

In [52]:
# give me the total amount of rows
fighters.shape[0]

In [53]:
fighters.to_csv('../processed_data/all_fighter_details_and_images.csv')

# Conclusion
All images were successfully retrieved from the UFC website. The images were saved in a new CSV file called `all_fighter_details_and_images.csv`, 554 fighters do not have an image URL, the other 2000 fighters have an image URL.
Now all the imag_urls have to be added to the database.