In [None]:
# Import the necessary libraries
import requests as rq
from bs4 import BeautifulSoup as bs
import pandas as pd
from time import sleep
from random import randint

In [None]:
# Define a dictionary headers to store the User-Agent string for the request
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0;Win64) AppleWebkit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'}

# Store a list of URLs for 10 top football teams
teams = [
    'https://www.transfermarkt.com/real-madrid/startseite/verein/418',
    'https://www.transfermarkt.com/fc-bayern-munchen/startseite/verein/27',
    'https://www.transfermarkt.com/fc-barcelona/startseite/verein/131',
    'https://www.transfermarkt.com/manchester-united/startseite/verein/985',
    'https://www.transfermarkt.com/juventus-turin/startseite/verein/506',
    'https://www.transfermarkt.com/fc-chelsea/startseite/verein/631',
    'https://www.transfermarkt.com/fc-liverpool/startseite/verein/31',
    'https://www.transfermarkt.com/manchester-city/startseite/verein/281',
    'https://www.transfermarkt.com/fc-paris-saint-germain/startseite/verein/583',
    'https://www.transfermarkt.com/atletico-madrid/startseite/verein/13'
]

# Store a range of years from 2012 to 2022
years = range(2012,2022)

# Initialize an empty list to store player links
players = []

In [None]:
# Loop through the teams list
for team in teams:
    # Loop through the years list
    for n in years:
        # Concatenate the team name and year to create the URL
        team_url_season = str(team) + '?saison_id='+ str(n)
        
        # Use a while loop to ensure the code runs until the data is extracted
        while True:
            try:
                # Make a GET request to the URL and parse the HTML
                request = rq.get(team_url_season, headers=headers)
                soup = bs(request.text, 'html.parser')
                # Find the first table with class "items" and store it in players_table
                players_table = soup.find_all('table', class_='items')[0]
                # Find all links within the players_table and store them in links
                links = players_table.find_all('a')
                # Extract the href attribute from the links and store it in links
                links = [l.get("href") for l in links]
                # Filter the links list to include only player profiles
                links = [l for l in links if '/profil/spieler/' in l]
                # Add the extracted player links to the players list
                players.extend(links)
                # Break the loop since the data has been extracted
                break
            except IndexError:
                # Print a message and sleep for 10 seconds if an IndexError occurs
                print('Index Error : Sleeping for 10 seconds before retrying')
                sleep(10)
        # Print the team name and year after data extraction is complete
        print(team.split('/')[3], n)
        # Sleep for a random time between 1 and 3 seconds to avoid rate-limiting
        sleep(randint(1,3))

In [None]:
# Create a dictionary with the key 'URL' and values 'players'
dict_players = {'URL' : players}

# Convert the dictionary to a pandas dataframe 'df'
df = pd.DataFrame(dict_players)

# Remove duplicates in the 'URL' column
df = df.drop_duplicates(subset = 'URL')

# Reset the index of the dataframe to start from 0
df.index = range(0,len(df))

# Prefix the 'URL' column with 'https://www.transfermarkt.com'
df['URL'] = 'https://www.transfermarkt.com' + df['URL'].astype(str)

# Write the dataframe to a csv file at 'output/player_links.csv'
df.to_csv('output/player_links.csv')
