In [166]:
import pandas as pd
import pickle
from scipy.stats import poisson

table = pickle.load(open('dict_table', 'rb'))
dataset = pd.read_csv("clean_fifa_worldcup_matches.csv")
fixture =  pd.read_csv("fixtures.csv")

In [167]:
print(dataset)

      HomeTeam       AwayTeam  Year  HomeGoals  AwayGoals  TotalGoals
0       France         Mexico  1930          4          1           5
1      Uruguay      Argentina  1930          4          2           6
2      Uruguay     Yugoslavia  1930          6          1           7
3    Argentina  United States  1930          6          1           7
4     Paraguay        Belgium  1930          1          0           1
..         ...            ...   ...        ...        ...         ...
895     Brazil     Costa Rica  2018          2          0           2
896     Serbia    Switzerland  2018          1          2           3
897     Serbia         Brazil  2018          0          2           2
898     France           Peru  2018          1          0           1
899     Brazil        Belgium  2018          1          2           3

[900 rows x 6 columns]


In [168]:
df_home = dataset[['HomeTeam', 'HomeGoals', 'AwayGoals']]
df_away = dataset[['AwayTeam', 'HomeGoals', 'AwayGoals']]

df_home = df_home.rename(columns={'HomeTeam':'Team', 'HomeGoals': 'GoalsScored', 'AwayGoals': 'GoalsConceded'})
df_away = df_away.rename(columns={'AwayTeam':'Team', 'HomeGoals': 'GoalsConceded', 'AwayGoals': 'GoalsScored'})

df_team_strength = pd.concat([df_home, df_away], ignore_index=True).groupby(['Team']).mean()

In [169]:
df_team_strength

Unnamed: 0_level_0,GoalsScored,GoalsConceded
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Algeria,1.000000,1.461538
Angola,0.333333,0.666667
Argentina,1.691358,1.148148
Australia,0.812500,1.937500
Austria,1.482759,1.620690
...,...,...
Uruguay,1.553571,1.321429
Wales,0.800000,0.800000
West Germany,2.112903,1.241935
Yugoslavia,1.666667,1.272727


In [170]:
def predict_points(home, away):
    if home in df_team_strength.index and away in df_team_strength.index:
        # goals_scored * goals_conceded
        lamb_home = df_team_strength.at[home,'GoalsScored'] * df_team_strength.at[away,'GoalsConceded']
        lamb_away = df_team_strength.at[away,'GoalsScored'] * df_team_strength.at[home,'GoalsConceded']
        prob_home, prob_away, prob_draw = 0, 0, 0
        for x in range(0,11): #number of goals home team
            for y in range(0, 11): #number of goals away team
                p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
                if x == y:
                    prob_draw += p
                elif x > y:
                    prob_home += p
                else:
                    prob_away += p
        
        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw
        return (points_home, points_away)
    else:
        return (0, 0)


In [171]:
print(predict_points('England', 'United States'))
print(predict_points('Argentina', 'Mexico'))
print(predict_points('Qatar (H)', 'Ecuador'))

(2.2356147635326007, 0.5922397535606193)
(2.3129151525530505, 0.5378377125059863)
(0, 0)


In [172]:
df_fixture_group_48 = fixture[:48].copy()
df_fixture_knockout = fixture[48:56].copy()
df_fixture_quarter = fixture[56:60].copy()
df_fixture_semi = fixture[60:62].copy()
df_fixture_final = fixture[62:].copy()


In [173]:
df_fixture_knockout


Unnamed: 0,home,score,away,year,Unnamed: 4,Unnamed: 5,Unnamed: 6
48,Winners Group A,Match 49,Runners-up Group B,2022,,,
49,Winners Group C,Match 50,Runners-up Group D,2022,,,
50,Winners Group D,Match 52,Runners-up Group C,2022,,,
51,Winners Group B,Match 51,Runners-up Group A,2022,,,
52,Winners Group E,Match 53,Runners-up Group F,2022,,,
53,Winners Group G,Match 54,Runners-up Group H,2022,,,
54,Winners Group F,Match 55,Runners-up Group E,2022,,,
55,Winners Group H,Match 56,Runners-up Group G,2022,,,


In [174]:
for group in table:
    teams_in_group = table[group]['Team'].values
    df_fixture_group_6 = df_fixture_group_48[df_fixture_group_48['home'].isin(teams_in_group)]
    for index, row in df_fixture_group_6.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predict_points(home, away)
        table[group].loc[table[group]['Team'] == home, 'Pts'] += points_home
        table[group].loc[table[group]['Team'] == away, 'Pts'] += points_away

    table[group] = table[group].sort_values('Pts', ascending=False).reset_index()
    table[group] = table[group][['Team', 'Pts']]
    table[group] = table[group].round(0)

In [175]:
for group in table:
    group_winner = table[group].loc[0, 'Team']
    runners_up = table[group].loc[1, 'Team']
    df_fixture_knockout.replace({f'Winners {group}':group_winner,
                                 f'Runners-up {group}':runners_up}, inplace=True)

df_fixture_knockout['winner'] = '?'
df_fixture_knockout


Unnamed: 0,home,score,away,year,Unnamed: 4,Unnamed: 5,Unnamed: 6,winner
48,Netherlands,Match 49,Wales,2022,,,,?
49,Argentina,Match 50,Denmark,2022,,,,?
50,France,Match 52,Poland,2022,,,,?
51,England,Match 51,Senegal,2022,,,,?
52,Germany,Match 53,Belgium,2022,,,,?
53,Brazil,Match 54,Uruguay,2022,,,,?
54,Croatia,Match 55,Spain,2022,,,,?
55,Portugal,Match 56,Switzerland,2022,,,,?


In [177]:
df_fixture_knockout = df_fixture_knockout.replace("Wales", "United States")
df_fixture_knockout = df_fixture_knockout.replace("Denmark", "Australia")
df_fixture_knockout = df_fixture_knockout.replace("Germany", "Morocco")
df_fixture_knockout = df_fixture_knockout.replace("Spain", "Japan")
df_fixture_knockout = df_fixture_knockout.replace("Uruguay", "South Korea")
df_fixture_knockout = df_fixture_knockout.replace("Belgium", "Spain")
df_fixture_knockout

Unnamed: 0,home,score,away,year,Unnamed: 4,Unnamed: 5,Unnamed: 6,winner
48,Netherlands,Match 49,United States,2022,,,,?
49,Argentina,Match 50,Australia,2022,,,,?
50,France,Match 52,Poland,2022,,,,?
51,England,Match 51,Senegal,2022,,,,?
52,Morocco,Match 53,Spain,2022,,,,?
53,Brazil,Match 54,South Korea,2022,,,,?
54,Croatia,Match 55,Japan,2022,,,,?
55,Portugal,Match 56,Switzerland,2022,,,,?


In [178]:
def get_winner(df_fixture_updated):
    for index, row in df_fixture_updated.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predict_points(home, away)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        df_fixture_updated.loc[index, 'winner'] = winner
    return df_fixture_updated


In [179]:
get_winner(df_fixture_knockout)


Unnamed: 0,home,score,away,year,Unnamed: 4,Unnamed: 5,Unnamed: 6,winner
48,Netherlands,Match 49,United States,2022,,,,Netherlands
49,Argentina,Match 50,Australia,2022,,,,Argentina
50,France,Match 52,Poland,2022,,,,France
51,England,Match 51,Senegal,2022,,,,England
52,Morocco,Match 53,Spain,2022,,,,Spain
53,Brazil,Match 54,South Korea,2022,,,,Brazil
54,Croatia,Match 55,Japan,2022,,,,Croatia
55,Portugal,Match 56,Switzerland,2022,,,,Portugal


In [180]:
def update_table(df_fixture_round_1, df_fixture_round_2):
    for index, row in df_fixture_round_1.iterrows():
        winner = df_fixture_round_1.loc[index, 'winner']
        match = df_fixture_round_1.loc[index, 'score']
        df_fixture_round_2.replace({f'Winners {match}':winner}, inplace=True)
    df_fixture_round_2['winner'] = '?'
    return df_fixture_round_2

In [181]:
update_table(df_fixture_knockout, df_fixture_quarter)


Unnamed: 0,home,score,away,year,Unnamed: 4,Unnamed: 5,Unnamed: 6,winner
56,Spain,Match 58,Brazil,2022,,,,?
57,Netherlands,Match 57,Argentina,2022,,,,?
58,Croatia,Match 60,Portugal,2022,,,,?
59,England,Match 59,France,2022,,,,?


In [182]:
df_fixture_quarter = df_fixture_quarter.replace("Croatia", "Morocco")
df_fixture_quarter = df_fixture_quarter.replace("Spain", "Croatia")
df_fixture_quarter

Unnamed: 0,home,score,away,year,Unnamed: 4,Unnamed: 5,Unnamed: 6,winner
56,Croatia,Match 58,Brazil,2022,,,,?
57,Netherlands,Match 57,Argentina,2022,,,,?
58,Morocco,Match 60,Portugal,2022,,,,?
59,England,Match 59,France,2022,,,,?


In [183]:
get_winner(df_fixture_quarter)


Unnamed: 0,home,score,away,year,Unnamed: 4,Unnamed: 5,Unnamed: 6,winner
56,Croatia,Match 58,Brazil,2022,,,,Brazil
57,Netherlands,Match 57,Argentina,2022,,,,Netherlands
58,Morocco,Match 60,Portugal,2022,,,,Portugal
59,England,Match 59,France,2022,,,,France


In [184]:
update_table(df_fixture_quarter, df_fixture_semi)


Unnamed: 0,home,score,away,year,Unnamed: 4,Unnamed: 5,Unnamed: 6,winner
60,Netherlands,Match 61,Brazil,2022,,,,?
61,France,Match 62,Portugal,2022,,,,?


In [186]:
df_fixture_semi = df_fixture_semi.replace("Netherlands", "Argentina")
df_fixture_semi = df_fixture_semi.replace("Brazil", "Croatia")
df_fixture_semi = df_fixture_semi.replace("Portugal", "Morocco")

In [187]:
get_winner(df_fixture_semi)


Unnamed: 0,home,score,away,year,Unnamed: 4,Unnamed: 5,Unnamed: 6,winner
60,Argentina,Match 61,Croatia,2022,,,,Argentina
61,France,Match 62,Morocco,2022,,,,France


In [188]:
update_table(df_fixture_semi, df_fixture_final)


Unnamed: 0,home,score,away,year,Unnamed: 4,Unnamed: 5,Unnamed: 6,winner
62,Losers Match 61,Match 63,Losers Match 62,2022,,,,?
63,Argentina,Match 64,France,2022,,,,?


In [189]:
get_winner(df_fixture_final)


Unnamed: 0,home,score,away,year,Unnamed: 4,Unnamed: 5,Unnamed: 6,winner
62,Losers Match 61,Match 63,Losers Match 62,2022,,,,Losers Match 62
63,Argentina,Match 64,France,2022,,,,France
