In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import time, os

In [None]:
fbref_query = "https://fbref.com/en/comps/9/history/Premier-League-Seasons"

In [None]:
def getAndParseURL(url):
    """
    This function takes a URL as input and returns a BeautifulSoup object.

    :param url: The URL to be fetched.
    :return: A BeautifulSoup object representing the parsed HTML content of the URL.
    """

    # Send a GET request to the specified URL.
    # The User-Agent header is set to mimic a web browser.
    result = requests.get(url, headers={"User-Agent":"Mozilla/5.0"})

    # Check if the request was successful (status code 200).
    if result.status_code == 200:

        # Parse the HTML content of the URL using BeautifulSoup.
        soup = BeautifulSoup(result.text, "html.parser")

        # Return the BeautifulSoup object.
        return soup

    else:
        # If the request was not successful, print an error message.
        print("Error: Unable to fetch URL. Status code:", result.status_code
              + ". Please check your internet connection or try again later.")

In [None]:
getAndParseURL(fbref_query)

### From 1990 to 2023

In [None]:

# Define an empty list to store the URLs of the seasons
seasons = []

# Get the HTML content of the URL and parse it
html = getAndParseURL(fbref_query)

# Define the number of seasons to retrieve
season_count = 14

# Loop through the HTML content to find the URLs of the seasons
for season in html.findAll("td", {"data-stat": "league_name"}):
    # Append the URL of the season to the list
    seasons.append("https://fbref.com" + season.find("a").get("href"))
    
    # Decrement the count of seasons to retrieve
    season_count -= 1
    
    # If the count reaches 0, break out of the loop
    if season_count == 0:
        break

# Print the list of season URLs
seasons
#
#This code retrieves the URLs of the seasons from the given URL. It uses the BeautifulSoup library to parse the HTML content and find the URLs. The code stores the URLs in a list and prints the list..</s>

In [None]:
data = requests.get(seasons[1])
current_df = pd.read_html(data.text, match="Squad Standard Stats")[0]

In [None]:
current_df

In [None]:
def getTables(matchTable,seasons,table):
    """
    This function takes a matchTable string, a list of seasons, and an empty list table.
    It iterates through each season in the seasons list, making a GET request to the season URL.
    The function then parses the HTML response using BeautifulSoup and extracts the year from the HTML.
    It reads the table data from the HTML response into a pandas DataFrame and adds the year as a new column to the DataFrame.
    The DataFrame is then appended to the table list.
    """
    for season in seasons:
        try:
            # Make a GET request to the season URL
            data = requests.get(season)
            
            # Parse the HTML response using BeautifulSoup
            soup = getAndParseURL(season)
            
            # Extract the year from the HTML
            year = soup.find("div",{"class":"comps"}).h1.text.strip().split()[0]
            
            # Read the table data from the HTML response into a pandas DataFrame
            current_df = pd.read_html(data.text, match=matchTable)[0]
            
            # Add the year as a new column to the DataFrame
            current_df["season"] = year
            
            # Append the DataFrame to the table list
            table.append(current_df)
            
        except:
            # Print the season URL if an exception occurs
            print(season)
        
        # Sleep for 5 seconds to prevent overloading the server
        time.sleep(5)

In [None]:

# Create empty lists for each category
standart_stats = []
regular_season = []
shooting = []
passing = []
goal_shot_creation = []
defensive_action = []
possession = []

# Create a list of lists for easy access and manipulation
table_list = [standart_stats, regular_season, shooting, passing, goal_shot_creation, defensive_action, possession]

# Create a list of matching strings for each category
matching_list = ["Squad Standard Stats", "Regular season", "Squad Shooting", "Squad Passing", "Squad Goal and Shot Creation", "Squad Defensive Actions", "Squad Possession"]

# This code creates empty lists for each category of data and stores them in a list of lists called table_list.
# The matching_list contains strings that match the categories in table_list.
# This structure allows for easy access and manipulation of the data.

In [None]:
getTables("Squad Possession",seasons,possession)
possession_results = pd.concat(possession, axis=0, ignore_index=True)

In [None]:
possession_results.columns = possession_results.columns.droplevel()
possession_results.to_csv("possession.csv", encoding='utf-8')

In [None]:
# otomatik hale getir
#def get_write_data(tables,seasons,matchList): 

### --------------------

In [None]:
soup = getAndParseURL('https://fbref.com/en/comps/9/2021-2022/2021-2022-Premier-League-Stats')

In [None]:
soup.find("div",{"class":"comps"}).h1.text.strip().split()[0]