# Hypothesis test for scores in FIFA World Cup

In [48]:
# Import packages
import pandas as pd
import pingouin
from scipy.stats import mannwhitneyu

In [49]:
# Load the datasets
men_df = pd.read_csv("men_results.csv")
women_df = pd.read_csv("women_results.csv")

In [50]:
# First 5 rows of datasets
men_df.head()

Unnamed: 0.1,Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament
0,0,1872-11-30,Scotland,England,0,0,Friendly
1,1,1873-03-08,England,Scotland,4,2,Friendly
2,2,1874-03-07,Scotland,England,2,1,Friendly
3,3,1875-03-06,England,Scotland,2,2,Friendly
4,4,1876-03-04,Scotland,England,3,0,Friendly


In [51]:
women_df.head()

Unnamed: 0.1,Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament
0,0,1969-11-01,Italy,France,1,0,Euro
1,1,1969-11-01,Denmark,England,4,3,Euro
2,2,1969-11-02,England,France,2,0,Euro
3,3,1969-11-02,Italy,Denmark,3,1,Euro
4,4,1975-08-25,Thailand,Australia,3,2,AFC Championship


In [52]:
# Check the structure of dataset
men_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44353 entries, 0 to 44352
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  44353 non-null  int64 
 1   date        44353 non-null  object
 2   home_team   44353 non-null  object
 3   away_team   44353 non-null  object
 4   home_score  44353 non-null  int64 
 5   away_score  44353 non-null  int64 
 6   tournament  44353 non-null  object
dtypes: int64(3), object(4)
memory usage: 2.4+ MB


In [53]:
women_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4884 entries, 0 to 4883
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  4884 non-null   int64 
 1   date        4884 non-null   object
 2   home_team   4884 non-null   object
 3   away_team   4884 non-null   object
 4   home_score  4884 non-null   int64 
 5   away_score  4884 non-null   int64 
 6   tournament  4884 non-null   object
dtypes: int64(3), object(4)
memory usage: 267.2+ KB


The datasets are clean and there are no missing data.

In [54]:
# Check the unique values 
men_df.tournament.unique()

array(['Friendly', 'British Home Championship', 'Évence Coppée Trophy',
       'Copa Lipton', 'Copa Newton', 'Copa Premio Honor Argentino',
       'Copa Premio Honor Uruguayo', 'Copa Roca', 'Copa América',
       'Copa Chevallier Boutell', 'Nordic Championship',
       'Central European International Cup', 'Baltic Cup', 'Balkan Cup',
       'FIFA World Cup', 'Copa Rio Branco',
       'FIFA World Cup qualification', 'Bolivarian Games',
       'CCCF Championship', 'NAFC Championship', 'Copa Oswaldo Cruz',
       'Pan American Championship', 'Copa del Pacífico',
       "Copa Bernardo O'Higgins", 'AFC Asian Cup qualification',
       'Atlantic Cup', 'AFC Asian Cup', 'African Cup of Nations',
       'Copa Paz del Chaco', 'Merdeka Tournament',
       'UEFA Euro qualification', 'UEFA Euro',
       'Windward Islands Tournament',
       'African Cup of Nations qualification', 'Vietnam Independence Cup',
       'Copa Carlos Dittborn', 'Phillip Seaga Cup',
       'CONCACAF Championship', 'Copa Ju

In [55]:
# Check the unique values
women_df.tournament.unique()

array(['Euro', 'AFC Championship', 'UEFA Euro qualification',
       'OFC Championship', 'UEFA Euro', 'African Championship',
       'CONCACAF Championship', 'Copa América', 'FIFA World Cup',
       'CONCACAF Invitational Tournament', 'Algarve Cup', 'Olympic Games',
       'Four Nations Tournament', 'African Championship qualification',
       'CONCACAF Gold Cup qualification', 'CONCACAF Gold Cup',
       'AFC Asian Cup qualification', 'AFC Asian Cup', 'Cyprus Cup',
       'Olympic Games qualification', 'Friendly', 'SheBelieves Cup',
       'FIFA World Cup qualification', 'Tournament of Nations',
       'OFC Nations Cup qualification', 'African Cup of Nations',
       'OFC Nations Cup', 'Tournoi de France',
       "Basque Country Women's Cup",
       'African Cup of Nations qualification'], dtype=object)

We need to convert the datatype of the column `date` into datetime. Then we create a mask to filter the dataset into a subset since `2002-01-01`.

In [56]:
# Convert the datatype into datetime.
men_df["date"] = pd.to_datetime(men_df["date"])
women_df["date"] = pd.to_datetime(women_df["date"])

In [57]:
# Create a mask for date
men_date_mask = men_df["date"] > "2002-01-01" 
women_date_mask = women_df["date"] > "2002-01-01"

In [58]:
# Create a mask for tournament
men_fifa_mask = men_df["tournament"] == "FIFA World Cup"
women_fifa_mask = women_df["tournament"] == "FIFA World Cup"

In [59]:
# Truncate the datasets with two filters
men_sub_df = men_df[(men_date_mask)&(men_fifa_mask)]
women_sub_df = women_df[(women_date_mask)&(women_fifa_mask)]

Now we compute the total scores including the cases for `home` and `away` to create new columns for total scores. In order to compare the data for men and women, it is convenient to make new a common column `group` to label each dataset as `men` and `women`.

In [60]:
# Create a col
men_sub_df["group"] = "men"
women_sub_df["group"] = "women"

In [61]:
# Compute total scores
men_sub_df["total_score"] = men_sub_df["home_score"] + men_sub_df["away_score"]
women_sub_df["total_score"] = women_sub_df["home_score"] + women_sub_df["away_score"]

In [62]:
# Combine two datasets and focus on the 
df = pd.concat([men_sub_df, women_sub_df], axis=0, ignore_index=True)

In [63]:
df.head()

Unnamed: 0.1,Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,group,total_score
0,25164,2002-05-31,France,Senegal,0,1,FIFA World Cup,men,1
1,25165,2002-06-01,Germany,Saudi Arabia,8,0,FIFA World Cup,men,8
2,25166,2002-06-01,Republic of Ireland,Cameroon,1,1,FIFA World Cup,men,2
3,25167,2002-06-01,Uruguay,Denmark,1,2,FIFA World Cup,men,3
4,25168,2002-06-02,Argentina,Nigeria,1,0,FIFA World Cup,men,1


In [64]:
# Focus on the total scores and men/women labels
# Transform the data for the pingouin Mann-Whitney U t-test/Wilcoxon-Mann-Whitney test
sub_df = df[["total_score", "group"]]
pivot_sub_df = sub_df.pivot(columns="group", values="total_score")

pivot_sub_df.head()

group,men,women
0,1.0,
1,8.0,
2,2.0,
3,3.0,
4,1.0,


To compare two different samples, we perform the pingouin Mann-Whitney U t-test/Wilcoxon-Mann-Whitney test.

In [65]:
# Perform right-tailed Wilcoxon-Mann-Whitney test with pingouin
results_pg = pingouin.mwu(x=pivot_sub_df["women"],
                          y=pivot_sub_df["men"],
                          alternative="greater")

# Another way of mwu using scipy
# results_scipy = mannwhitneyu(x=women_subset["goals_scored"],
#                            y=men_subset["goals_scored"],
#                           alternative="greater")

In [66]:
# Round off p-values up to four decimal places.
p_val = round(results_pg["p-val"].values[0], 4)

The final result is expressed either as "fail to reject" or as "reject", based on the **10%** significance level. It is stored in a dictionary in `result_dict`.

In [67]:
# Perform the hypothesis test based on the significance level. 
if p_val <= 0.1:
    result = "reject"
else:
    result = "fail to reject"

# Store a pair of p-values and decisions in a dictionary
result_dict = {"p_val": p_val, "result": result}
print(result_dict)

{'p_val': 0.0051, 'result': 'reject'}
