### Away goals influence analysis

In [1]:
import soccerdata as sd
import numpy as np
from scipy import stats
import pandas as pd

Czy obecnie pada mniej goli niż wcześniej?

In [4]:
def extract_score(score):
    try:
        index = score.index("–")
        goals_list = [score[index-1], score[index+1]]
    except Exception:
        print('score is NONE!')
        goals_list = [0, 0]
    return goals_list

In [5]:
def get_leg_goals_count(seasons, print_seasons=False, league="UEFA-Champions league"):
    
    leg_1_goals = []
    leg_2_goals = []

    for season in seasons:
        # get knockout data
        ucl_season = sd.FBref(leagues=league, seasons=season)
        schedule = ucl_season.read_schedule()
        knockout = schedule[schedule['round'] != 'Group stage'].copy()
        # count goals
        knockout['goals'] = knockout['score'].apply(lambda x: sum([int(i) for i in extract_score(x)]))
        # extract leg info
        knockout['leg'] = knockout['notes'].apply(lambda x: 'final' if pd.isna(x) else x[:5])
        # calculate mean goals
        result = knockout.groupby('leg')['goals'].mean()

        leg_1_goals = leg_1_goals + knockout[knockout['leg'] == 'Leg 1']['goals'].to_list()
        leg_2_goals = leg_2_goals + knockout[knockout['leg'] == 'Leg 2']['goals'].to_list()
        
        if print_seasons:
            print(f"Season {str(season)[:2]}/{str(season)[2:]}")
            print(result)
    
    return leg_1_goals, leg_2_goals

In [6]:
no_rule_seasons = [2122, 2223, 2324]
rule_seasons = [2014, 2015, 2016, 2017, 2018, 2020]

In [10]:
leg_1_goals, leg_2_goals = get_leg_goals_count(rule_seasons)

In [11]:
print("avg goals 14/15 to 20/21")
print(f"leg 1: {round(np.mean(leg_1_goals), 2)}")
print(f"leg 2: {round(np.mean(leg_2_goals), 2)}")

avg goals 14/15 to 20/21
leg 1: 2.82
leg 2: 3.08


In [12]:
leg_1_goals_no, leg_2_goals_no = get_leg_goals_count(no_rule_seasons)

In [13]:
print("avg goals 21/22 to 22/23")
print(f"leg 1: {round(np.mean(leg_1_goals_no), 2)}")
print(f"leg 2: {round(np.mean(leg_2_goals_no), 2)}")

avg goals 21/22 to 22/23
leg 1: 2.5
leg 2: 2.88


In [131]:
def test_means(leg, leg_no, equal_var=True):
    t_statistic, p_value = stats.ttest_ind(leg, leg_no, equal_var=equal_var)

    print('leg 1:')
    if p_value < 0.05:
        print("Różnica w średniej liczbie goli jest istotna.")
    else:
        print("Nie ma istotnej różnicy w średniej liczbie goli.")

    print("p value:", p_value)

In [130]:
test_means(leg_1_goals, leg_1_goals_no, equal_var=False)
test_means(leg_2_goals, leg_2_goals_no, equal_var=False)

leg 1:
Nie ma istotnej różnicy w średniej liczbie goli.
p value: 0.28673583357096566
leg 1:
Nie ma istotnej różnicy w średniej liczbie goli.
p value: 0.693346491402913


Czy po usunięciu zasady o bramkach wyjazdowych zwiększyła się liczba dogrywek?

In [13]:
def get_number_of_et(seasons, print_matches=True, league="UEFA-Champions league"):
    ucl_season = sd.FBref(leagues=league, seasons=seasons)
    schedule = ucl_season.read_schedule()
    knockout = schedule[~schedule['round'].isin(['Group stage', 'Final'])].copy()
    # knockout.reset_index(drop=True, inplace=True)
    extra_time = knockout[knockout['notes'].apply(lambda x: False if pd.isna(x) else (x.find('Extra') != -1 or x.find('extra') != -1))]
    if print_matches:
        print(extra_time['notes'])
    return len(extra_time)

In [18]:
seasons = [14, 15, 16, 17, 18, 20]
count = get_number_of_et(seasons, print_matches=False)
print(f"No. of matches with extra time: {count}")
print(f"ET per season: {round(count / len(seasons), 2)}")

No. of matches with extra time: 7
ET per season: 1.17


In [19]:
seasons = [21, 22, 23]
count = get_number_of_et(seasons, print_matches=False)
print(f"No. of matches with extra time: {count}")
print(f"ET per season: {round(count / len(seasons), 2)}")

No. of matches with extra time: 5
ET per season: 1.67


## EUROPA LEAGUE

In [7]:
leg_1_goals, leg_2_goals = get_leg_goals_count(rule_seasons, league='UEFA-Europa League')

In [8]:
print("avg goals 14/15 to 20/21")
print(f"leg 1: {round(np.mean(leg_1_goals), 2)}")
print(f"leg 2: {round(np.mean(leg_2_goals), 2)}")
print(f"summed: {round(np.mean(leg_1_goals + leg_2_goals), 2)}")

avg goals 14/15 to 20/21
leg 1: 2.74
leg 2: 2.83
summed: 2.79


In [11]:
leg_1_goals_no, leg_2_goals_no = get_leg_goals_count(no_rule_seasons, league='UEFA-Europa League')

score is NONE!
score is NONE!


In [12]:
print("avg goals 21/22 to 23/24")
print(f"leg 1: {round(np.mean(leg_1_goals_no), 2)}")
print(f"leg 2: {round(np.mean(leg_2_goals_no), 2)}")
print(f"summed: {round(np.mean(leg_1_goals_no + leg_2_goals_no), 2)}")

avg goals 21/22 to 23/24
leg 1: 2.74
leg 2: 2.92
summed: 2.83


In [16]:
seasons = [14, 15, 16, 17, 18, 20]
count = get_number_of_et(seasons, print_matches=False, league="UEFA-Europa League")
print(f"No. of matches with extra time: {count}")
print(f"ET per season: {round(count / len(seasons), 2)}")

No. of matches with extra time: 13
ET per season: 2.17


In [17]:
seasons = [21, 22, 23]
count = get_number_of_et(seasons, print_matches=False, league="UEFA-Europa League")
print(f"No. of matches with extra time: {count}")
print(f"ET per season: {round(count / len(seasons), 2)}")

No. of matches with extra time: 13
ET per season: 4.33


In [23]:
"""
UCL
goals/match 2.95 vs 2.69
extra times/sezon 1.17 vs 1.67

UEL
g/m 2.79 vs 2.83
et/s 2.17 vs 4.33
"""
pass