### Web Scrping Project for Football Data

Read a .csv from a URL with Pandas

Target Website: https://www.football-data.co.uk/data.php

In [1]:
import pandas as pd 

# reading 1 csv file from the website

df_premier25 = pd.read_csv('https://www.football-data.co.uk/mmz4281/2526/E0.csv')

In [2]:
df_premier25.head(5)

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA,BFECAHH,BFECAHA
0,E0,15/08/2025,20:00,Liverpool,Bournemouth,4,2,H,1,0,...,2.03,1.78,2.07,1.85,2.03,1.88,1.94,1.76,2.14,1.86
1,E0,16/08/2025,12:30,Aston Villa,Newcastle,0,0,D,0,0,...,2.05,1.8,2.02,1.89,2.06,1.8,1.95,1.74,2.14,1.86
2,E0,16/08/2025,15:00,Brighton,Fulham,1,1,D,0,0,...,1.83,2.03,1.93,2.0,1.84,2.03,1.8,1.96,1.91,2.08
3,E0,16/08/2025,15:00,Sunderland,West Ham,3,0,H,0,0,...,1.95,1.9,1.97,1.95,1.95,1.94,1.86,1.78,2.02,1.97
4,E0,16/08/2025,15:00,Tottenham,Burnley,3,0,H,1,0,...,1.98,1.88,1.99,1.93,1.98,1.91,1.88,1.83,2.07,1.92


In [3]:

# rename columns
df_premier25 = df_premier25.rename(columns={'Date':'date',
                                            'HomeTeam':'home_team',
                                            'AwayTeam':'away_team',
                                            'FTHG': 'home_goals',
                                            'FTAG': 'away_goals'})

In [4]:
# show dataframe
df_premier25.head(5)

Unnamed: 0,Div,date,Time,home_team,away_team,home_goals,away_goals,FTR,HTHG,HTAG,...,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA,BFECAHH,BFECAHA
0,E0,15/08/2025,20:00,Liverpool,Bournemouth,4,2,H,1,0,...,2.03,1.78,2.07,1.85,2.03,1.88,1.94,1.76,2.14,1.86
1,E0,16/08/2025,12:30,Aston Villa,Newcastle,0,0,D,0,0,...,2.05,1.8,2.02,1.89,2.06,1.8,1.95,1.74,2.14,1.86
2,E0,16/08/2025,15:00,Brighton,Fulham,1,1,D,0,0,...,1.83,2.03,1.93,2.0,1.84,2.03,1.8,1.96,1.91,2.08
3,E0,16/08/2025,15:00,Sunderland,West Ham,3,0,H,0,0,...,1.95,1.9,1.97,1.95,1.95,1.94,1.86,1.78,2.02,1.97
4,E0,16/08/2025,15:00,Tottenham,Burnley,3,0,H,1,0,...,1.98,1.88,1.99,1.93,1.98,1.91,1.88,1.83,2.07,1.92



### Read .csv from Multiple URLs with Pandas

https://www.football-data.co.uk/mmz4281/2526/E0.csv

Link: root + season + league

In [5]:
# link structure
"https://www.football-data.co.uk/mmz4281/" + "2526" + "/" + "E0" + ".csv"

'https://www.football-data.co.uk/mmz4281/2526/E0.csv'

In [6]:
# create a root variable
root = "https://www.football-data.co.uk/mmz4281/"

### Multiple leagues

- https://www.football-data.co.uk/mmz4281/2526/E0.csv
- https://www.football-data.co.uk/mmz4281/2526/E1.csv
- https://www.football-data.co.uk/mmz4281/2526/E2.csv
- https://www.football-data.co.uk/mmz4281/2526/E3.csv
- https://www.football-data.co.uk/mmz4281/2526/EC.csv

In [7]:
# creating list of leagues
leagues = ['E0','E2', 'E3']

frames = []
# looping through leagues, read multiple csv and append it into a list
for league in leagues:
    df = pd.read_csv(root + "2526" + "/"  + league + ".csv")
    frames.append(df)

In [8]:
# length of frames
len(frames)

3

In [9]:
# show 1st, 2nd and 3rd element
frames[2].head(3)

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA,BFECAHH,BFECAHA
0,E3,02/08/2025,15:00,Accrington,Gillingham,1,1,D,0,0,...,1.98,1.88,1.76,2.12,1.98,1.88,1.92,1.83,2.09,1.88
1,E3,02/08/2025,15:00,Barnet,Fleetwood Town,0,2,A,0,1,...,2.0,1.85,2.29,1.65,2.08,1.85,2.0,1.74,2.04,1.93
2,E3,02/08/2025,15:00,Bristol Rvs,Harrogate,0,1,A,0,0,...,1.83,2.03,1.85,2.0,1.85,2.03,1.78,1.96,1.88,2.12


### Multiple Seasons

In [10]:
for league in leagues:
    for season in range(15, 21):
        url = f"{root}{season}{season+1}/{league}.csv"
        print(f"Reading: {url}")
        try:
            df = pd.read_csv(url)
            df.insert(1, 'season', season)
            frames.append(df)
        except pd.errors.EmptyDataError:
            print(f"Empty file: {url}")
        except Exception as e:
            print(f"Failed to read {url}: {e}")

Reading: https://www.football-data.co.uk/mmz4281/1516/E0.csv
Reading: https://www.football-data.co.uk/mmz4281/1617/E0.csv
Reading: https://www.football-data.co.uk/mmz4281/1718/E0.csv
Reading: https://www.football-data.co.uk/mmz4281/1819/E0.csv
Reading: https://www.football-data.co.uk/mmz4281/1920/E0.csv
Reading: https://www.football-data.co.uk/mmz4281/2021/E0.csv
Reading: https://www.football-data.co.uk/mmz4281/1516/E2.csv
Reading: https://www.football-data.co.uk/mmz4281/1617/E2.csv
Reading: https://www.football-data.co.uk/mmz4281/1718/E2.csv
Reading: https://www.football-data.co.uk/mmz4281/1819/E2.csv
Reading: https://www.football-data.co.uk/mmz4281/1920/E2.csv
Reading: https://www.football-data.co.uk/mmz4281/2021/E2.csv
Reading: https://www.football-data.co.uk/mmz4281/1516/E3.csv
Reading: https://www.football-data.co.uk/mmz4281/1617/E3.csv
Reading: https://www.football-data.co.uk/mmz4281/1718/E3.csv
Reading: https://www.football-data.co.uk/mmz4281/1819/E3.csv
Reading: https://www.foo

In [11]:

# total frames (6 seasons x 3 leagues)
len(frames)

21

### Organize All The Data in a Dictionary

In [12]:
# create a dictionary with original name of league as key
dict_countries = {
              'Spanish La Liga':'SP1', 'Spanish Segunda Division':'SP2',
              'German Bundesliga':'D1', 
              'English Premier League':'E0', 'English League 1':'E2', 'English League 2':'E3',
             }

In [13]:

# get a dictionary element
dict_countries['German Bundesliga']

'D1'

In [14]:
dict_historical_data = {} 

# looping through key elements
for league in dict_countries:
    frames = [] # one dataframe per league
    for season in range(15, 21):
        df = pd.read_csv(root + str(season) + str(season+1) + "/" + dict_countries[league]+ ".csv")
        df.insert(1, 'season', season)
        frames.append(df)
    df_frames = pd.concat(frames) # storing all seasons of a league in a single dataframe
    dict_historical_data[league] = df_frames # assigning dataframe to a new dictionary

In [15]:
# show dataframe inside dict_historical_data
dict_historical_data['Spanish La Liga']

Unnamed: 0,Div,season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA
0,SP1,15,21/08/15,Malaga,Sevilla,0,0,D,0,0,...,,,,,,,,,,
1,SP1,15,22/08/15,Ath Madrid,Las Palmas,1,0,H,1,0,...,,,,,,,,,,
2,SP1,15,22/08/15,Espanol,Getafe,1,0,H,1,0,...,,,,,,,,,,
3,SP1,15,22/08/15,La Coruna,Sociedad,0,0,D,0,0,...,,,,,,,,,,
4,SP1,15,22/08/15,Vallecano,Valencia,0,0,D,0,0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,SP1,20,22/05/2021,Osasuna,Sociedad,0,1,A,0,0,...,2.34,1.00,2.07,1.86,2.06,1.87,2.07,1.96,1.98,1.88
376,SP1,20,22/05/2021,Real Madrid,Villarreal,2,1,H,0,1,...,2.57,-1.00,2.01,1.92,2.00,1.93,2.02,2.02,1.95,1.92
377,SP1,20,22/05/2021,Valladolid,Ath Madrid,1,2,A,1,0,...,2.30,1.50,1.90,2.03,1.90,2.02,1.95,2.06,1.90,1.97
378,SP1,20,23/05/2021,Granada,Getafe,0,0,D,0,0,...,1.80,0.25,1.73,2.08,1.77,2.18,1.84,2.23,1.77,2.12
