### Away goals influence analysis

In [132]:
import soccerdata as sd
import numpy as np
from scipy import stats

Czy obecnie pada mniej goli niż wcześniej?

In [64]:
def extract_score(score):
    index = score.index("–")
    goals_list = [score[index-1], score[index+1]]
    return goals_list

In [133]:
def get_leg_goals_count(seasons, print_seasons=False):
    
    leg_1_goals = []
    leg_2_goals = []

    for season in seasons:
        # get knockout data
        ucl_season = sd.FBref(leagues="UEFA-Champions league", seasons=season)
        schedule = ucl_season.read_schedule()
        knockout = schedule[schedule['round'] != 'Group stage'].copy()
        # count goals
        knockout['goals'] = knockout['score'].apply(lambda x: sum([int(i) for i in extract_score(x)]))
        # extract leg info
        knockout['leg'] = knockout['notes'].apply(lambda x: 'final' if pd.isna(x) else x[:5])
        # calculate mean goals
        result = knockout.groupby('leg')['goals'].mean()

        leg_1_goals = leg_1_goals + knockout[knockout['leg'] == 'Leg 1']['goals'].to_list()
        leg_2_goals = leg_2_goals + knockout[knockout['leg'] == 'Leg 2']['goals'].to_list()
        
        if print_seasons:
            print(f"Season {str(season)[:2]}/{str(season)[2:]}")
            print(result)
    
    return leg_1_goals, leg_2_goals

In [97]:
no_rule_seasons = [2122, 2223]
rule_seasons = [2014, 2015, 2016, 2017, 2018, 2020]

In [124]:
leg_1_goals, leg_2_goals = get_leg_goals_count(rule_seasons)

In [125]:
print("avg goals 14/15 to 20/21")
print(f"leg 1: {round(np.mean(leg_1_goals), 2)}")
print(f"leg 2: {round(np.mean(leg_2_goals), 2)}")

avg goals 14/15 to 20/21
leg 1: 2.82
leg 2: 3.08


In [126]:
leg_1_goals_no, leg_2_goals_no = get_leg_goals_count(no_rule_seasons)

In [127]:
print("avg goals 21/22 to 22/23")
print(f"leg 1: {round(np.mean(leg_1_goals_no), 2)}")
print(f"leg 2: {round(np.mean(leg_2_goals_no), 2)}")

avg goals 21/22 to 22/23
leg 1: 2.43
leg 2: 2.89


In [131]:
def test_means(leg, leg_no, equal_var=True):
    t_statistic, p_value = stats.ttest_ind(leg, leg_no, equal_var=equal_var)

    print('leg 1:')
    if p_value < 0.05:
        print("Różnica w średniej liczbie goli jest istotna.")
    else:
        print("Nie ma istotnej różnicy w średniej liczbie goli.")

    print("p value:", p_value)

In [130]:
test_means(leg_1_goals, leg_1_goals_no, equal_var=False)
test_means(leg_2_goals, leg_2_goals_no, equal_var=False)

leg 1:
Nie ma istotnej różnicy w średniej liczbie goli.
p value: 0.28673583357096566
leg 1:
Nie ma istotnej różnicy w średniej liczbie goli.
p value: 0.693346491402913


Czy po usunięciu zasady o bramkach wyjazdowych zwiększyła się liczba dogrywek?

In [188]:
def get_number_of_et(seasons, print_matches=True):
    ucl_season = sd.FBref(leagues="UEFA-Champions league", seasons=seasons)
    schedule = ucl_season.read_schedule()
    knockout = schedule[~schedule['round'].isin(['Group stage', 'Final'])].copy()
    # knockout.reset_index(drop=True, inplace=True)
    extra_time = knockout[knockout['notes'].apply(lambda x: False if pd.isna(x) else (x.find('Extra') != -1 or x.find('extra') != -1))]
    if print_matches:
        print(extra_time['notes'])
    return len(extra_time)

In [191]:
seasons = [14, 15, 16, 17, 18, 19, 20]
count = get_number_of_et(seasons, print_matches=False)
print(f"No. of matches with extra time: {count}")
print(f"ET per season: {count / len(seasons)}")

No. of matches with extra time: 8
ET per season: 1.1428571428571428


In [192]:
seasons = [21, 22]
count = get_number_of_et(seasons, print_matches=False)
print(f"No. of matches with extra time: {count}")
print(f"ET per season: {count / len(seasons)}")

No. of matches with extra time: 2
ET per season: 1.0
