<a href="https://colab.research.google.com/github/davidcamilo0710/QATAR_2022_Prediction/blob/master/Predictions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# INDEX
- GROUP STAGE PREDICTION
- KNOCKOUT STAGE PREDICTION

### Importing data and models

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import joblib
import warnings
warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
last_team_scores = pd.read_csv('/content/drive/MyDrive/BigData/Data/last_team_scores.csv')
last_team_scores.tail()

Unnamed: 0,team,date,rank,latest_date,row_number,goalkeeper_score,defense_score,offense_score,midfield_score
27,Switzerland,2023/06/12,19,2023/06/12,1,85.0,79.0,77.0,80.0
28,USA,2023/06/14,11,2023/06/14,1,77.0,76.0,78.0,76.0
29,Ukraine,2023/06/14,22,2023/06/14,1,75.0,75.0,79.0,80.0
30,Uruguay,2023/06/11,15,2023/06/11,1,80.0,81.0,84.0,80.0
31,Wales,2023/06/14,29,2023/06/14,1,74.0,75.0,73.0,79.0


In [None]:
squad_stats = pd.read_csv('/content/drive/MyDrive/BigData/Data/squad_stats.csv')
squad_stats.tail()

Unnamed: 0,nationality_name,overall,potential
27,Australia,72.55,76.82
28,Tunisia,73.0,76.0
29,Saudi Arabia,71.45,75.27
30,Peru,72.0,74.55
31,IR Iran,70.82,72.36


In [None]:
group_matches = pd.read_csv('/content/drive/MyDrive/BigData/Data/Group_stage.csv')
round_16 = group_matches.iloc[48:56, :]
quarter_finals = group_matches.iloc[56:60, :]
semi_finals = group_matches.iloc[60:62, :]
final = group_matches.iloc[62:63, :]
second_final = group_matches.iloc[63:64, :]
group_matches = group_matches.iloc[:48, :]
group_matches.tail()

Unnamed: 0,country1,country2,group
43,Peru,Germany,e
44,Ghana,Uruguay,h
45,Korea Republic,Portugal,h
46,Serbia,Switzerland,g
47,Cameroon,Brazil,g


In [None]:
gs_model = joblib.load("/content/drive/MyDrive/BigData/groups_stage_prediction.pkl")

In [None]:
ks_model = joblib.load("/content/drive/MyDrive/BigData/knockout_stage_prediction.pkl")

In [None]:
team_group = group_matches.drop(['country2'], axis=1)
team_group = team_group.drop_duplicates().reset_index(drop=True)
team_group = team_group.rename(columns = {"country1":"team"})
team_group.head(5)

Unnamed: 0,team,group
0,Qatar,a
1,Senegal,a
2,England,b
3,USA,b
4,France,d


### Declaring important functions

In [None]:
def matches(g_matches):
    g_matches.insert(2, 'potential1', g_matches['country1'].map(squad_stats.set_index('nationality_name')['potential']))
    g_matches.insert(3, 'potential2', g_matches['country2'].map(squad_stats.set_index('nationality_name')['potential']))
    g_matches.insert(4, 'rank1', g_matches['country1'].map(last_team_scores.set_index('team')['rank']))
    g_matches.insert(5, 'rank2', g_matches['country2'].map(last_team_scores.set_index('team')['rank']))
    pred_set = []

    for index, row in g_matches.iterrows():
        if row['potential1'] > row['potential2'] and abs(row['potential1'] - row['potential2']) > 2:
            pred_set.append({'Team1': row['country1'], 'Team2': row['country2']})
        elif row['potential2'] > row['potential1'] and abs(row['potential2'] - row['potential1']) > 2:
            pred_set.append({'Team1': row['country2'], 'Team2': row['country1']})
        else:
            if row['rank1'] > row['rank2']:
                pred_set.append({'Team1': row['country1'], 'Team2': row['country2']})
            else:
                pred_set.append({'Team1': row['country2'], 'Team2': row['country1']})

    pred_set = pd.DataFrame(pred_set)
    pred_set.insert(2, 'Team1_FIFA_RANK', pred_set['Team1'].map(last_team_scores.set_index('team')['rank']))
    pred_set.insert(3, 'Team2_FIFA_RANK', pred_set['Team2'].map(last_team_scores.set_index('team')['rank']))
    pred_set.insert(4, 'Team1_Goalkeeper_Score', pred_set['Team1'].map(last_team_scores.set_index('team')['goalkeeper_score']))
    pred_set.insert(5, 'Team2_Goalkeeper_Score', pred_set['Team2'].map(last_team_scores.set_index('team')['goalkeeper_score']))
    pred_set.insert(6, 'Team1_Defense', pred_set['Team1'].map(last_team_scores.set_index('team')['defense_score']))
    pred_set.insert(7, 'Team1_Offense', pred_set['Team1'].map(last_team_scores.set_index('team')['offense_score']))
    pred_set.insert(8, 'Team1_Midfield', pred_set['Team1'].map(last_team_scores.set_index('team')['midfield_score']))
    pred_set.insert(9, 'Team2_Defense', pred_set['Team2'].map(last_team_scores.set_index('team')['defense_score']))
    pred_set.insert(10, 'Team2_Offense', pred_set['Team2'].map(last_team_scores.set_index('team')['offense_score']))
    pred_set.insert(11, 'Team2_Midfield', pred_set['Team2'].map(last_team_scores.set_index('team')['midfield_score']))
    return pred_set

In [None]:
 def print_results(dataset, y_pred, matches, proba):
  results = []
  for i in range(dataset.shape[0]):
      print()
      if y_pred[i] == 2:
          print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Draw")
          results.append({'result': 'Draw'})
      elif y_pred[i] == 1:
          print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Winner: " + dataset.iloc[i, 0])
          results.append({'result': dataset.iloc[i, 0]})
      else:
          print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Winner: " + dataset.iloc[i, 1])
          results.append({'result': dataset.iloc[i, 1]})
      try:
          print('Probability of ' + dataset.iloc[i, 0] + ' winning: ', '%.3f'%(proba[i][1]))
          print('Probability of Draw: ', '%.3f'%(proba[i][2]))
          print('Probability of ' + dataset.iloc[i, 1] + ' winning: ', '%.3f'%(proba[i][0]))
      except:
          print('Probability of ' + dataset.iloc[i, 1] + ' winning: ', '%.3f'%(proba[i][0]))
      print("")
  results = pd.DataFrame(results)
  matches = pd.concat([matches.group, results], axis=1)
  return matches

In [None]:
def winner_to_match(round, prev_match):
    round.insert(0, 'c1', round['country1'].map(prev_match.set_index('group')['result']))
    round.insert(1, 'c2', round['country2'].map(prev_match.set_index('group')['result']))
    round = round.drop(['country1','country2'], axis=1)
    round = round.rename(columns={'c1':'country1', 'c2':'country2'}).reset_index(drop=True)
    return round

In [None]:
def prediction_knockout(round):
    dataset_round = matches(round)
    prediction_round = ks_model.predict(dataset_round)
    proba_round = ks_model.predict_proba(dataset_round)
    results_round = print_results(dataset_round, prediction_round, round, proba_round)
    return results_round

In [None]:
def center_str(round):
    spaces = ['',' ','  ','   ','    ','     ',]
    for j in range(2):
        for i in range(round.shape[0]):
            if (13 - len(round.iloc[i, j])) % 2 == 0:
                round.iloc[i, j] = spaces[int((13 - len(round.iloc[i, j])) / 2)] + round.iloc[i, j] + spaces[int((13 - len(round.iloc[i, j])) / 2)]
            else:
                round.iloc[i, j] = spaces[int(((13 - len(round.iloc[i, j])) / 2) - 0.5)] + round.iloc[i, j] + spaces[int(((13 - len(round.iloc[i, j])) / 2) + 0.5)]
    return round

In [None]:
def center2(a):
    spaces = ['',' ','  ','   ','    ','     ','      ','       ','        ','         ','          ','           ','            ','             ','              ','               ','                ','                 ','                  ','                   ','                    ']
    if (29 - len(a)) % 2 == 0:
        a = spaces[int((29 - len(a)) / 2)] + a + spaces[int((29 - len(a)) / 2)]
    else:
        a = spaces[int(((29 - len(a)) / 2) - 0.5)] + a + spaces[int(((29 - len(a)) / 2) + 0.5)]
    return a

# GROUP STAGE PREDICTION

In [None]:
dataset_groups = matches(group_matches)
dataset_groups.tail()

Unnamed: 0,Team1,Team2,Team1_FIFA_RANK,Team2_FIFA_RANK,Team1_Goalkeeper_Score,Team2_Goalkeeper_Score,Team1_Defense,Team1_Offense,Team1_Midfield,Team2_Defense,Team2_Offense,Team2_Midfield
43,Germany,Peru,16.0,32.0,90.0,74.0,84.0,83.0,88.0,75.0,73.0,77.0
44,Uruguay,Ghana,15.0,,80.0,,81.0,84.0,80.0,,,
45,Portugal,Korea Republic,6.0,23.0,82.0,75.0,85.0,86.0,85.0,73.0,80.0,74.0
46,Switzerland,Serbia,19.0,,85.0,,79.0,77.0,80.0,,,
47,Brazil,Cameroon,5.0,,89.0,,85.0,86.0,86.0,,,


In [None]:
prediction_groups = gs_model.predict(dataset_groups)
proba = gs_model.predict_proba(dataset_groups)
results = print_results(dataset_groups, prediction_groups, group_matches, proba)


Qatar vs. Ecuador => Winner: Qatar
Probability of Qatar winning:  0.622
Probability of Draw:  0.185
Probability of Ecuador winning:  0.193


Senegal vs. Netherlands => Winner: Netherlands
Probability of Netherlands winning:  0.518
Probability of Draw:  0.288
Probability of Senegal winning:  0.194


England vs. IR Iran => Winner: England
Probability of England winning:  0.687
Probability of Draw:  0.192
Probability of IR Iran winning:  0.121


USA vs. Wales => Winner: USA
Probability of USA winning:  0.497
Probability of Draw:  0.249
Probability of Wales winning:  0.254


France vs. Australia => Winner: France
Probability of France winning:  0.707
Probability of Draw:  0.164
Probability of Australia winning:  0.128


Denmark vs. Tunisia => Winner: Denmark
Probability of Denmark winning:  0.459
Probability of Draw:  0.171
Probability of Tunisia winning:  0.369


Mexico vs. Poland => Winner: Poland
Probability of Poland winning:  0.398
Probability of Draw:  0.285
Probability of Mexico wi

In [None]:
team_group['points'] = 0
team_group
for i in range(results.shape[0]):
    for j in range(team_group.shape[0]):
        if results.iloc[i, 1] == team_group.iloc[j, 0]:
            team_group.iloc[j, 2] += 3

**Points Table:** only the best two from each group advance to the next phase

In [None]:
print(team_group.groupby(['group','team']).mean().astype(int))

                      points
group team                  
a     Ecuador              0
      Netherlands          9
      Qatar                3
      Senegal              6
b     England              9
      IR Iran              0
      USA                  6
      Wales                3
c     Argentina            9
      Mexico               3
      Poland               6
      Saudi Arabia         0
d     Australia            0
      Denmark              6
      France               9
      Tunisia              3
e     Germany              9
      Japan                3
      Peru                 0
      Spain                6
f     Belgium              9
      Canada               0
      Croatia              6
      Morocco              3
g     Brazil               9
      Cameroon             0
      Serbia               3
      Switzerland          6
h     Ghana                3
      Korea Republic       0
      Portugal             9
      Uruguay              6


# KNOCKOUT STAGE PREDICTION
**Round of 16**

In [None]:
round_of_16 = team_group[team_group['points'] > 5].reset_index(drop=True)
round_of_16['group'] = (4 - 1/3 * round_of_16.points).astype(int).astype(str) + round_of_16.group
round_of_16 = round_of_16.rename(columns = {"team":"result"})

round_16 = winner_to_match(round_16, round_of_16)
results_round_16 = prediction_knockout(round_16)


Netherlands vs. USA => Winner: Netherlands
Probability of Netherlands winning:  0.835
Probability of USA winning:  0.165


Argentina vs. Denmark => Winner: Argentina
Probability of Argentina winning:  0.775
Probability of Denmark winning:  0.225


Germany vs. Croatia => Winner: Germany
Probability of Germany winning:  0.603
Probability of Croatia winning:  0.397


Brazil vs. Uruguay => Winner: Brazil
Probability of Brazil winning:  0.832
Probability of Uruguay winning:  0.168


England vs. Senegal => Winner: England
Probability of England winning:  0.820
Probability of Senegal winning:  0.180


France vs. Poland => Winner: France
Probability of France winning:  0.803
Probability of Poland winning:  0.197


Belgium vs. Spain => Winner: Spain
Probability of Spain winning:  0.606
Probability of Belgium winning:  0.394


Portugal vs. Switzerland => Winner: Portugal
Probability of Portugal winning:  0.823
Probability of Switzerland winning:  0.177



**Quarterfinals**

In [None]:
quarter_finals = winner_to_match(quarter_finals, results_round_16)
results_quarter_finals = prediction_knockout(quarter_finals)


Netherlands vs. Argentina => Winner: Argentina
Probability of Netherlands winning:  0.383
Probability of Argentina winning:  0.617


Germany vs. Brazil => Winner: Germany
Probability of Germany winning:  0.516
Probability of Brazil winning:  0.484


England vs. France => Winner: France
Probability of England winning:  0.419
Probability of France winning:  0.581


Spain vs. Portugal => Winner: Spain
Probability of Spain winning:  0.672
Probability of Portugal winning:  0.328



**Semi-final**

In [None]:
semi_finals = winner_to_match(semi_finals, results_quarter_finals)
results_finals = prediction_knockout(semi_finals)


Argentina vs. Germany => Winner: Argentina
Probability of Germany winning:  0.344
Probability of Argentina winning:  0.656


France vs. Spain => Winner: France
Probability of Spain winning:  0.336
Probability of France winning:  0.664



**Final**

In [None]:
final = winner_to_match(final, results_finals)
winner = prediction_knockout(final)


Argentina vs. France => Winner: France
Probability of France winning:  0.567
Probability of Argentina winning:  0.433



**Third place**

In [None]:
second = results_finals[~results_finals.result.isin(winner.result)]
results_finals_3 = results_quarter_finals[~results_quarter_finals.result.isin(results_finals.result)]
results_finals_3.iloc[0, 0]='z1'
results_finals_3.iloc[1, 0]='z2'
second_final = winner_to_match(second_final, results_finals_3)
third = prediction_knockout(second_final)


Germany vs. Spain => Winner: Germany
Probability of Germany winning:  0.547
Probability of Spain winning:  0.453



**Printing Tournament Table**

In [None]:
round_16 = center_str(round_16)
quarter_finals = center_str(quarter_finals)
semi_finals = center_str(semi_finals)
final = center_str(final)
group_matches = center_str(group_matches)

In [None]:
print(round_16.iloc[0, 0]+'━━━━┓                                                                                                                             ┏━━━━'+round_16.iloc[4, 0])
print('                 ┃                                                                                                                             ┃')
print('                 ┃━━━━'+quarter_finals.iloc[0, 0]+'━━━━┓                                                                                 ┏━━━━'+quarter_finals.iloc[2, 0]+'━━━━┃')
print('                 ┃                     ┃                                                                                 ┃                     ┃')
print(round_16.iloc[0, 1]+'━━━━┛                     ┃                                                                                 ┃                     ┗━━━━'+round_16.iloc[4, 1])
print('                                       ┃━━━━'+semi_finals.iloc[0, 0]+'━━━━┓                                     ┏━━━━'+semi_finals.iloc[1, 0]+'━━━━┃')
print(round_16.iloc[1, 0]+'━━━━┓                     ┃                     ┃                                     ┃                     ┃                     ┏━━━━'+round_16.iloc[5, 0])
print('                 ┃                     ┃                     ┃                                     ┃                     ┃                     ┃')
print('                 ┃━━━━'+quarter_finals.iloc[0, 1]+'━━━━┛                     ┃                                     ┃                     ┗━━━━'+quarter_finals.iloc[2, 1]+'━━━━┃')
print('                 ┃                                           ┃                                     ┃                                           ┃')
print(round_16.iloc[1, 1]+'━━━━┛                                           ┃                                     ┃                                           ┗━━━━'+round_16.iloc[5, 1])
print('                                                             ┃━━━━'+final.iloc[0, 0]+'vs.'+final.iloc[0, 1]+'━━━━┃')
print(round_16.iloc[2, 0]+'━━━━┓                                           ┃                                     ┃                                           ┏━━━━'+round_16.iloc[6, 0])
print('                 ┃                                           ┃                                     ┃                                           ┃')
print('                 ┃━━━━'+quarter_finals.iloc[1, 0]+'━━━━┓                     ┃                                     ┃                     ┏━━━━'+quarter_finals.iloc[3, 0]+'━━━━┃')
print('                 ┃                     ┃                     ┃                                     ┃                     ┃                     ┃')
print(round_16.iloc[2, 1]+'━━━━┛                     ┃                     ┃                                     ┃                     ┃                     ┗━━━━'+round_16.iloc[6, 1])
print('                                       ┃━━━━'+semi_finals.iloc[0, 1]+'━━━━┛                                     ┗━━━━'+semi_finals.iloc[1, 1]+'━━━━┃')
print(round_16.iloc[3, 0]+'━━━━┓                     ┃                                                                                 ┃                     ┏━━━━'+round_16.iloc[7, 0])
print('                 ┃                     ┃                                                                                 ┃                     ┃')
print('                 ┃━━━━'+quarter_finals.iloc[1, 1]+'━━━━┛                                                                                 ┗━━━━'+quarter_finals.iloc[3, 1]+'━━━━┃')
print('                 ┃                                                                                                                             ┃')
print(round_16.iloc[3, 1]+'━━━━┛                                                                                                                             ┗━━━━'+round_16.iloc[7, 1])
print("                                                                 "+center2("\U0001F947"+winner.iloc[0, 1]))
print("                                                                 "+center2("\U0001F948"+second.iloc[0, 1]))
print("                                                                 "+center2("\U0001F949"+third.iloc[0, 1]))

 Netherlands ━━━━┓                                                                                                                             ┏━━━━   England   
                 ┃                                                                                                                             ┃
                 ┃━━━━ Netherlands ━━━━┓                                                                                 ┏━━━━   England   ━━━━┃
                 ┃                     ┃                                                                                 ┃                     ┃
     USA     ━━━━┛                     ┃                                                                                 ┃                     ┗━━━━   Senegal   
                                       ┃━━━━  Argentina  ━━━━┓                                     ┏━━━━   France    ━━━━┃
  Argentina  ━━━━┓                     ┃                     ┃                                     ┃                  