In [284]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from itertools import product
from sklearn.feature_selection import VarianceThreshold
from scipy.cluster.hierarchy import linkage, fcluster
from scipy.spatial.distance import squareform
from scipy.stats import entropy
import seaborn as sns
import matplotlib.pyplot as plt

In [285]:
# 1. Load data
events = pd.read_csv('data/events.csv')
games_info = pd.read_csv('data/ginf.csv')

In [286]:
events

Unnamed: 0,id_odsp,id_event,sort_order,time,text,event_type,event_type2,side,event_team,opponent,player,player2,player_in,player_out,shot_place,shot_outcome,is_goal,location,bodypart,assist_method,situation,fast_break
0,UFot0hit/,UFot0hit1,1,2,Attempt missed. Mladen Petric (Hamburg) left f...,1,12.0,2,Hamburg SV,Borussia Dortmund,mladen petric,gokhan tore,,,6.0,2.0,0,9.0,2.0,1,1.0,0
1,UFot0hit/,UFot0hit2,2,4,"Corner, Borussia Dortmund. Conceded by Dennis...",2,,1,Borussia Dortmund,Hamburg SV,dennis diekmeier,dennis diekmeier,,,,,0,,,0,,0
2,UFot0hit/,UFot0hit3,3,4,"Corner, Borussia Dortmund. Conceded by Heiko ...",2,,1,Borussia Dortmund,Hamburg SV,heiko westermann,heiko westermann,,,,,0,,,0,,0
3,UFot0hit/,UFot0hit4,4,7,Foul by Sven Bender (Borussia Dortmund).,3,,1,Borussia Dortmund,Hamburg SV,sven bender,,,,,,0,,,0,,0
4,UFot0hit/,UFot0hit5,5,7,Gokhan Tore (Hamburg) wins a free kick in the ...,8,,2,Hamburg SV,Borussia Dortmund,gokhan tore,,,,,,0,2.0,,0,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
941004,z5L2OT5E/,z5L2OT5E123,123,92,Lucas Torreira (Sampdoria) wins a free kick in...,8,,2,Sampdoria,Atalanta,lucas torreira,,,,,,0,2.0,,0,,0
941005,z5L2OT5E/,z5L2OT5E124,124,93,"Corner, Sampdoria. Conceded by Andrea Masiello.",2,,2,Sampdoria,Atalanta,,,,,,,0,,,0,,0
941006,z5L2OT5E/,z5L2OT5E125,125,93,Attempt missed. Fabio Quagliarella (Sampdoria)...,1,12.0,2,Sampdoria,Atalanta,fabio quagliarella,lucas torreira,,,8.0,2.0,0,9.0,1.0,1,3.0,0
941007,z5L2OT5E/,z5L2OT5E126,126,94,Alberto Grassi (Atalanta) wins a free kick on ...,8,,1,Atalanta,Sampdoria,alberto grassi,,,,,,0,4.0,,0,,0


In [287]:
games_info

Unnamed: 0,id_odsp,link_odsp,adv_stats,date,league,season,country,ht,at,fthg,ftag,odd_h,odd_d,odd_a,odd_over,odd_under,odd_bts,odd_bts_n
0,UFot0hit/,/soccer/germany/bundesliga-2011-2012/dortmund-...,True,2011-08-05,D1,2012,germany,Borussia Dortmund,Hamburg SV,3,1,1.56,4.41,7.42,,,,
1,Aw5DflLH/,/soccer/germany/bundesliga-2011-2012/augsburg-...,True,2011-08-06,D1,2012,germany,FC Augsburg,SC Freiburg,2,2,2.36,3.60,3.40,,,,
2,bkjpaC6n/,/soccer/germany/bundesliga-2011-2012/werder-br...,True,2011-08-06,D1,2012,germany,Werder Bremen,Kaiserslautern,2,0,1.83,4.20,4.80,,,,
3,CzPV312a/,/soccer/france/ligue-1-2011-2012/paris-sg-lori...,True,2011-08-06,F1,2012,france,Paris Saint-Germain,Lorient,0,1,1.55,4.50,9.40,,,,
4,GUOdmtII/,/soccer/france/ligue-1-2011-2012/caen-valencie...,True,2011-08-06,F1,2012,france,Caen,Valenciennes,1,0,2.50,3.40,3.45,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10107,xAkY8l6R/,/soccer/italy/serie-a/genoa-crotone-xAkY8l6R/,True,2017-01-22,I1,2017,italy,Genoa,Crotone,2,2,1.97,4.35,8.00,1.95,2.03,2.03,1.86
10108,xSU9scI9/,/soccer/england/premier-league/chelsea-hull-ci...,True,2017-01-22,E0,2017,england,Chelsea,Hull,2,0,1.19,8.50,20.00,1.54,2.68,2.40,1.66
10109,xY7uZwOI/,/soccer/france/ligue-1/monaco-lorient-xY7uZwOI/,True,2017-01-22,F1,2017,france,AS Monaco,Lorient,4,0,1.32,6.24,11.50,1.53,3.08,1.80,2.25
10110,YyeGxMX8/,/soccer/spain/laliga/betis-gijon-YyeGxMX8/,True,2017-01-22,SP1,2017,spain,Real Betis,Sporting Gijon,0,0,1.74,4.07,5.90,2.20,1.89,2.05,1.86


In [288]:
# 2. Filter relevant columns
#    Incorporate shot_place, shot_outcome, side, etc.
relevant_columns = [
    'event_team',
    'event_type',
    'event_type2',
    'location',
    'assist_method',
    'situation',
    'bodypart',
    'shot_place',
    'shot_outcome'
]
events = events[relevant_columns]
events

Unnamed: 0,event_team,event_type,event_type2,location,assist_method,situation,bodypart,shot_place,shot_outcome
0,Hamburg SV,1,12.0,9.0,1,1.0,2.0,6.0,2.0
1,Borussia Dortmund,2,,,0,,,,
2,Borussia Dortmund,2,,,0,,,,
3,Borussia Dortmund,3,,,0,,,,
4,Hamburg SV,8,,2.0,0,,,,
...,...,...,...,...,...,...,...,...,...
941004,Sampdoria,8,,2.0,0,,,,
941005,Sampdoria,2,,,0,,,,
941006,Sampdoria,1,12.0,9.0,1,3.0,1.0,8.0,2.0
941007,Atalanta,8,,4.0,0,,,,


In [289]:
# Drop rows with missing critical values
critical_columns = ['event_team', 'event_type']
events = events.dropna(subset=critical_columns)
events

Unnamed: 0,event_team,event_type,event_type2,location,assist_method,situation,bodypart,shot_place,shot_outcome
0,Hamburg SV,1,12.0,9.0,1,1.0,2.0,6.0,2.0
1,Borussia Dortmund,2,,,0,,,,
2,Borussia Dortmund,2,,,0,,,,
3,Borussia Dortmund,3,,,0,,,,
4,Hamburg SV,8,,2.0,0,,,,
...,...,...,...,...,...,...,...,...,...
941004,Sampdoria,8,,2.0,0,,,,
941005,Sampdoria,2,,,0,,,,
941006,Sampdoria,1,12.0,9.0,1,3.0,1.0,8.0,2.0
941007,Atalanta,8,,4.0,0,,,,


In [290]:
# One-hot encode categorical columns
categorical_columns = [
    'event_type',
    'event_type2',
    'location',
    'assist_method',
    'situation',
    'bodypart',
    'shot_place',
    'shot_outcome'
]
encoder = OneHotEncoder(sparse_output=False)
encoded = encoder.fit_transform(events[categorical_columns])

In [291]:
# Create a DataFrame with the encoded features
encoded_feature_names = encoder.get_feature_names_out(categorical_columns)
encoded_features = pd.DataFrame(encoded, columns=encoded_feature_names)
encoded_features

Unnamed: 0,event_type_1,event_type_2,event_type_3,event_type_4,event_type_5,event_type_6,event_type_7,event_type_8,event_type_9,event_type_10,event_type_11,event_type2_12.0,event_type2_13.0,event_type2_14.0,event_type2_15.0,event_type2_nan,location_1.0,location_2.0,location_3.0,location_4.0,location_5.0,location_6.0,location_7.0,location_8.0,location_9.0,location_10.0,location_11.0,location_12.0,location_13.0,location_14.0,location_15.0,location_16.0,location_17.0,location_18.0,location_19.0,location_nan,assist_method_0,assist_method_1,assist_method_2,assist_method_3,assist_method_4,situation_1.0,situation_2.0,situation_3.0,situation_4.0,situation_nan,bodypart_1.0,bodypart_2.0,bodypart_3.0,bodypart_nan,shot_place_1.0,shot_place_2.0,shot_place_3.0,shot_place_4.0,shot_place_5.0,shot_place_6.0,shot_place_7.0,shot_place_8.0,shot_place_9.0,shot_place_10.0,shot_place_11.0,shot_place_12.0,shot_place_13.0,shot_place_nan,shot_outcome_1.0,shot_outcome_2.0,shot_outcome_3.0,shot_outcome_4.0,shot_outcome_nan
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
941004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
941005,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
941006,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
941007,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


In [292]:
# Combine encoded features with the team column
data_encoded = pd.concat([events['event_team'].reset_index(drop=True), encoded_features], axis=1)
data_encoded

Unnamed: 0,event_team,event_type_1,event_type_2,event_type_3,event_type_4,event_type_5,event_type_6,event_type_7,event_type_8,event_type_9,event_type_10,event_type_11,event_type2_12.0,event_type2_13.0,event_type2_14.0,event_type2_15.0,event_type2_nan,location_1.0,location_2.0,location_3.0,location_4.0,location_5.0,location_6.0,location_7.0,location_8.0,location_9.0,location_10.0,location_11.0,location_12.0,location_13.0,location_14.0,location_15.0,location_16.0,location_17.0,location_18.0,location_19.0,location_nan,assist_method_0,assist_method_1,assist_method_2,assist_method_3,assist_method_4,situation_1.0,situation_2.0,situation_3.0,situation_4.0,situation_nan,bodypart_1.0,bodypart_2.0,bodypart_3.0,bodypart_nan,shot_place_1.0,shot_place_2.0,shot_place_3.0,shot_place_4.0,shot_place_5.0,shot_place_6.0,shot_place_7.0,shot_place_8.0,shot_place_9.0,shot_place_10.0,shot_place_11.0,shot_place_12.0,shot_place_13.0,shot_place_nan,shot_outcome_1.0,shot_outcome_2.0,shot_outcome_3.0,shot_outcome_4.0,shot_outcome_nan
0,Hamburg SV,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,Borussia Dortmund,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
2,Borussia Dortmund,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
3,Borussia Dortmund,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
4,Hamburg SV,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
941004,Sampdoria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
941005,Sampdoria,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
941006,Sampdoria,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
941007,Atalanta,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


In [293]:
# Group by team
team_aggregated = data_encoded.groupby('event_team').sum().reset_index()
team_aggregated

Unnamed: 0,event_team,event_type_1,event_type_2,event_type_3,event_type_4,event_type_5,event_type_6,event_type_7,event_type_8,event_type_9,event_type_10,event_type_11,event_type2_12.0,event_type2_13.0,event_type2_14.0,event_type2_15.0,event_type2_nan,location_1.0,location_2.0,location_3.0,location_4.0,location_5.0,location_6.0,location_7.0,location_8.0,location_9.0,location_10.0,location_11.0,location_12.0,location_13.0,location_14.0,location_15.0,location_16.0,location_17.0,location_18.0,location_19.0,location_nan,assist_method_0,assist_method_1,assist_method_2,assist_method_3,assist_method_4,situation_1.0,situation_2.0,situation_3.0,situation_4.0,situation_nan,bodypart_1.0,bodypart_2.0,bodypart_3.0,bodypart_nan,shot_place_1.0,shot_place_2.0,shot_place_3.0,shot_place_4.0,shot_place_5.0,shot_place_6.0,shot_place_7.0,shot_place_8.0,shot_place_9.0,shot_place_10.0,shot_place_11.0,shot_place_12.0,shot_place_13.0,shot_place_nan,shot_outcome_1.0,shot_outcome_2.0,shot_outcome_3.0,shot_outcome_4.0,shot_outcome_nan
0,AC Ajaccio,1106.0,449.0,1593.0,238.0,0.0,12.0,307.0,1534.0,365.0,96.0,15.0,762.0,365.0,17.0,4.0,4567.0,285.0,850.0,336.0,212.0,187.0,9.0,9.0,10.0,71.0,13.0,92.0,12.0,27.0,15.0,457.0,24.0,20.0,3.0,8.0,3075.0,4953.0,490.0,198.0,43.0,31.0,913.0,86.0,74.0,33.0,4609.0,566.0,374.0,166.0,4609.0,19.0,245.0,86.0,80.0,126.0,89.0,21.0,136.0,122.0,90.0,25.0,28.0,30.0,4618.0,370.0,466.0,245.0,21.0,4613.0
1,AC Milan,3065.0,1225.0,2677.0,499.0,3.0,18.0,593.0,2840.0,543.0,128.0,42.0,2200.0,543.0,39.0,11.0,8840.0,673.0,1484.0,784.0,344.0,339.0,10.0,28.0,35.0,215.0,34.0,256.0,29.0,93.0,51.0,1465.0,23.0,19.0,1.0,22.0,5728.0,9433.0,1556.0,477.0,61.0,106.0,2615.0,144.0,219.0,87.0,8568.0,1936.0,788.0,341.0,8568.0,69.0,782.0,267.0,255.0,308.0,219.0,48.0,357.0,305.0,224.0,63.0,68.0,72.0,8596.0,1030.0,1196.0,782.0,48.0,8577.0
2,AJ Auxerre,446.0,178.0,558.0,89.0,0.0,3.0,104.0,469.0,94.0,35.0,8.0,302.0,94.0,5.0,0.0,1583.0,108.0,237.0,123.0,70.0,54.0,6.0,4.0,4.0,48.0,10.0,33.0,10.0,18.0,1.0,173.0,5.0,8.0,1.0,2.0,1069.0,1682.0,174.0,92.0,16.0,20.0,364.0,18.0,41.0,23.0,1538.0,199.0,178.0,69.0,1538.0,6.0,96.0,28.0,39.0,56.0,36.0,6.0,48.0,47.0,48.0,10.0,8.0,16.0,1540.0,156.0,188.0,96.0,6.0,1538.0
3,AS Monaco,1665.0,727.0,1666.0,243.0,0.0,8.0,385.0,1736.0,285.0,106.0,29.0,1219.0,285.0,11.0,7.0,5328.0,407.0,848.0,558.0,221.0,260.0,8.0,19.0,16.0,131.0,32.0,128.0,44.0,56.0,26.0,607.0,11.0,9.0,2.0,18.0,3449.0,5631.0,754.0,363.0,49.0,53.0,1387.0,86.0,142.0,50.0,5185.0,797.0,574.0,294.0,5185.0,34.0,383.0,143.0,154.0,208.0,144.0,25.0,172.0,187.0,78.0,38.0,41.0,41.0,5202.0,622.0,630.0,383.0,25.0,5190.0
4,AS Nancy Lorraine,1058.0,356.0,1260.0,160.0,0.0,7.0,232.0,1174.0,254.0,67.0,12.0,791.0,254.0,8.0,1.0,3526.0,239.0,618.0,261.0,172.0,145.0,7.0,11.0,4.0,85.0,15.0,82.0,14.0,28.0,10.0,502.0,17.0,16.0,2.0,4.0,2348.0,3789.0,523.0,193.0,42.0,33.0,874.0,69.0,86.0,29.0,3522.0,651.0,239.0,168.0,3522.0,34.0,197.0,87.0,76.0,110.0,94.0,18.0,133.0,135.0,82.0,27.0,31.0,29.0,3527.0,347.0,495.0,197.0,18.0,3523.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137,Villarreal,1849.0,798.0,2045.0,392.0,1.0,5.0,480.0,2085.0,465.0,104.0,24.0,1356.0,465.0,16.0,5.0,6406.0,476.0,1150.0,512.0,235.0,224.0,11.0,35.0,26.0,164.0,34.0,191.0,46.0,64.0,24.0,709.0,9.0,10.0,3.0,11.0,4314.0,6892.0,924.0,295.0,43.0,94.0,1578.0,83.0,146.0,42.0,6399.0,937.0,671.0,241.0,6399.0,61.0,424.0,161.0,151.0,244.0,80.0,29.0,205.0,221.0,118.0,45.0,45.0,50.0,6414.0,671.0,721.0,424.0,29.0,6403.0
138,Watford,678.0,250.0,725.0,129.0,1.0,4.0,164.0,639.0,82.0,16.0,6.0,465.0,82.0,6.0,3.0,2138.0,143.0,328.0,205.0,92.0,76.0,1.0,14.0,6.0,53.0,9.0,49.0,9.0,14.0,9.0,296.0,1.0,8.0,0.0,4.0,1377.0,2229.0,301.0,117.0,42.0,5.0,574.0,46.0,46.0,12.0,2016.0,432.0,149.0,97.0,2016.0,9.0,189.0,45.0,44.0,75.0,70.0,9.0,81.0,65.0,31.0,19.0,15.0,23.0,2019.0,218.0,261.0,189.0,9.0,2017.0
139,Werder Bremen,2318.0,860.0,2645.0,375.0,0.0,10.0,490.0,2714.0,419.0,110.0,34.0,1644.0,419.0,20.0,6.0,7886.0,529.0,1540.0,726.0,296.0,349.0,24.0,26.0,31.0,184.0,50.0,184.0,42.0,70.0,27.0,885.0,34.0,15.0,3.0,17.0,4943.0,8331.0,1082.0,415.0,108.0,39.0,1912.0,149.0,208.0,49.0,7657.0,1302.0,623.0,393.0,7657.0,60.0,550.0,175.0,204.0,251.0,132.0,38.0,301.0,229.0,162.0,51.0,67.0,80.0,7675.0,804.0,921.0,550.0,38.0,7662.0
140,West Brom,1427.0,598.0,1328.0,230.0,0.0,6.0,356.0,1315.0,243.0,24.0,5.0,1041.0,243.0,6.0,5.0,4237.0,291.0,720.0,428.0,134.0,170.0,10.0,14.0,6.0,102.0,28.0,110.0,22.0,76.0,14.0,588.0,7.0,12.0,2.0,8.0,2790.0,4491.0,634.0,305.0,64.0,38.0,1183.0,74.0,145.0,25.0,4105.0,797.0,367.0,263.0,4105.0,29.0,392.0,92.0,94.0,134.0,109.0,12.0,169.0,172.0,105.0,35.0,35.0,44.0,4110.0,420.0,598.0,392.0,12.0,4110.0


In [294]:
# Print columns names
team_aggregated.columns

Index(['event_team', 'event_type_1', 'event_type_2', 'event_type_3',
       'event_type_4', 'event_type_5', 'event_type_6', 'event_type_7',
       'event_type_8', 'event_type_9', 'event_type_10', 'event_type_11',
       'event_type2_12.0', 'event_type2_13.0', 'event_type2_14.0',
       'event_type2_15.0', 'event_type2_nan', 'location_1.0', 'location_2.0',
       'location_3.0', 'location_4.0', 'location_5.0', 'location_6.0',
       'location_7.0', 'location_8.0', 'location_9.0', 'location_10.0',
       'location_11.0', 'location_12.0', 'location_13.0', 'location_14.0',
       'location_15.0', 'location_16.0', 'location_17.0', 'location_18.0',
       'location_19.0', 'location_nan', 'assist_method_0', 'assist_method_1',
       'assist_method_2', 'assist_method_3', 'assist_method_4',
       'situation_1.0', 'situation_2.0', 'situation_3.0', 'situation_4.0',
       'situation_nan', 'bodypart_1.0', 'bodypart_2.0', 'bodypart_3.0',
       'bodypart_nan', 'shot_place_1.0', 'shot_place_2.0', '

In [295]:
# Drop unnecessary features (e.g., "Not recorded")
columns_to_drop = [col for col in team_aggregated.columns if 'nan' in col or 'Not recorded' in col]
team_aggregated.drop(columns=columns_to_drop, inplace=True, errors='ignore')

In [296]:
team_aggregated.columns

Index(['event_team', 'event_type_1', 'event_type_2', 'event_type_3',
       'event_type_4', 'event_type_5', 'event_type_6', 'event_type_7',
       'event_type_8', 'event_type_9', 'event_type_10', 'event_type_11',
       'event_type2_12.0', 'event_type2_13.0', 'event_type2_14.0',
       'event_type2_15.0', 'location_1.0', 'location_2.0', 'location_3.0',
       'location_4.0', 'location_5.0', 'location_6.0', 'location_7.0',
       'location_8.0', 'location_9.0', 'location_10.0', 'location_11.0',
       'location_12.0', 'location_13.0', 'location_14.0', 'location_15.0',
       'location_16.0', 'location_17.0', 'location_18.0', 'location_19.0',
       'assist_method_0', 'assist_method_1', 'assist_method_2',
       'assist_method_3', 'assist_method_4', 'situation_1.0', 'situation_2.0',
       'situation_3.0', 'situation_4.0', 'bodypart_1.0', 'bodypart_2.0',
       'bodypart_3.0', 'shot_place_1.0', 'shot_place_2.0', 'shot_place_3.0',
       'shot_place_4.0', 'shot_place_5.0', 'shot_place_6.0

In [297]:
# Remove location_19.0 also because it is not recorded
team_aggregated.drop(columns='location_19.0', inplace=True, errors='ignore')

In [298]:
team_aggregated.columns

Index(['event_team', 'event_type_1', 'event_type_2', 'event_type_3',
       'event_type_4', 'event_type_5', 'event_type_6', 'event_type_7',
       'event_type_8', 'event_type_9', 'event_type_10', 'event_type_11',
       'event_type2_12.0', 'event_type2_13.0', 'event_type2_14.0',
       'event_type2_15.0', 'location_1.0', 'location_2.0', 'location_3.0',
       'location_4.0', 'location_5.0', 'location_6.0', 'location_7.0',
       'location_8.0', 'location_9.0', 'location_10.0', 'location_11.0',
       'location_12.0', 'location_13.0', 'location_14.0', 'location_15.0',
       'location_16.0', 'location_17.0', 'location_18.0', 'assist_method_0',
       'assist_method_1', 'assist_method_2', 'assist_method_3',
       'assist_method_4', 'situation_1.0', 'situation_2.0', 'situation_3.0',
       'situation_4.0', 'bodypart_1.0', 'bodypart_2.0', 'bodypart_3.0',
       'shot_place_1.0', 'shot_place_2.0', 'shot_place_3.0', 'shot_place_4.0',
       'shot_place_5.0', 'shot_place_6.0', 'shot_place_7.

In [299]:
team_aggregated

Unnamed: 0,event_team,event_type_1,event_type_2,event_type_3,event_type_4,event_type_5,event_type_6,event_type_7,event_type_8,event_type_9,event_type_10,event_type_11,event_type2_12.0,event_type2_13.0,event_type2_14.0,event_type2_15.0,location_1.0,location_2.0,location_3.0,location_4.0,location_5.0,location_6.0,location_7.0,location_8.0,location_9.0,location_10.0,location_11.0,location_12.0,location_13.0,location_14.0,location_15.0,location_16.0,location_17.0,location_18.0,assist_method_0,assist_method_1,assist_method_2,assist_method_3,assist_method_4,situation_1.0,situation_2.0,situation_3.0,situation_4.0,bodypart_1.0,bodypart_2.0,bodypart_3.0,shot_place_1.0,shot_place_2.0,shot_place_3.0,shot_place_4.0,shot_place_5.0,shot_place_6.0,shot_place_7.0,shot_place_8.0,shot_place_9.0,shot_place_10.0,shot_place_11.0,shot_place_12.0,shot_place_13.0,shot_outcome_1.0,shot_outcome_2.0,shot_outcome_3.0,shot_outcome_4.0
0,AC Ajaccio,1106.0,449.0,1593.0,238.0,0.0,12.0,307.0,1534.0,365.0,96.0,15.0,762.0,365.0,17.0,4.0,285.0,850.0,336.0,212.0,187.0,9.0,9.0,10.0,71.0,13.0,92.0,12.0,27.0,15.0,457.0,24.0,20.0,3.0,4953.0,490.0,198.0,43.0,31.0,913.0,86.0,74.0,33.0,566.0,374.0,166.0,19.0,245.0,86.0,80.0,126.0,89.0,21.0,136.0,122.0,90.0,25.0,28.0,30.0,370.0,466.0,245.0,21.0
1,AC Milan,3065.0,1225.0,2677.0,499.0,3.0,18.0,593.0,2840.0,543.0,128.0,42.0,2200.0,543.0,39.0,11.0,673.0,1484.0,784.0,344.0,339.0,10.0,28.0,35.0,215.0,34.0,256.0,29.0,93.0,51.0,1465.0,23.0,19.0,1.0,9433.0,1556.0,477.0,61.0,106.0,2615.0,144.0,219.0,87.0,1936.0,788.0,341.0,69.0,782.0,267.0,255.0,308.0,219.0,48.0,357.0,305.0,224.0,63.0,68.0,72.0,1030.0,1196.0,782.0,48.0
2,AJ Auxerre,446.0,178.0,558.0,89.0,0.0,3.0,104.0,469.0,94.0,35.0,8.0,302.0,94.0,5.0,0.0,108.0,237.0,123.0,70.0,54.0,6.0,4.0,4.0,48.0,10.0,33.0,10.0,18.0,1.0,173.0,5.0,8.0,1.0,1682.0,174.0,92.0,16.0,20.0,364.0,18.0,41.0,23.0,199.0,178.0,69.0,6.0,96.0,28.0,39.0,56.0,36.0,6.0,48.0,47.0,48.0,10.0,8.0,16.0,156.0,188.0,96.0,6.0
3,AS Monaco,1665.0,727.0,1666.0,243.0,0.0,8.0,385.0,1736.0,285.0,106.0,29.0,1219.0,285.0,11.0,7.0,407.0,848.0,558.0,221.0,260.0,8.0,19.0,16.0,131.0,32.0,128.0,44.0,56.0,26.0,607.0,11.0,9.0,2.0,5631.0,754.0,363.0,49.0,53.0,1387.0,86.0,142.0,50.0,797.0,574.0,294.0,34.0,383.0,143.0,154.0,208.0,144.0,25.0,172.0,187.0,78.0,38.0,41.0,41.0,622.0,630.0,383.0,25.0
4,AS Nancy Lorraine,1058.0,356.0,1260.0,160.0,0.0,7.0,232.0,1174.0,254.0,67.0,12.0,791.0,254.0,8.0,1.0,239.0,618.0,261.0,172.0,145.0,7.0,11.0,4.0,85.0,15.0,82.0,14.0,28.0,10.0,502.0,17.0,16.0,2.0,3789.0,523.0,193.0,42.0,33.0,874.0,69.0,86.0,29.0,651.0,239.0,168.0,34.0,197.0,87.0,76.0,110.0,94.0,18.0,133.0,135.0,82.0,27.0,31.0,29.0,347.0,495.0,197.0,18.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137,Villarreal,1849.0,798.0,2045.0,392.0,1.0,5.0,480.0,2085.0,465.0,104.0,24.0,1356.0,465.0,16.0,5.0,476.0,1150.0,512.0,235.0,224.0,11.0,35.0,26.0,164.0,34.0,191.0,46.0,64.0,24.0,709.0,9.0,10.0,3.0,6892.0,924.0,295.0,43.0,94.0,1578.0,83.0,146.0,42.0,937.0,671.0,241.0,61.0,424.0,161.0,151.0,244.0,80.0,29.0,205.0,221.0,118.0,45.0,45.0,50.0,671.0,721.0,424.0,29.0
138,Watford,678.0,250.0,725.0,129.0,1.0,4.0,164.0,639.0,82.0,16.0,6.0,465.0,82.0,6.0,3.0,143.0,328.0,205.0,92.0,76.0,1.0,14.0,6.0,53.0,9.0,49.0,9.0,14.0,9.0,296.0,1.0,8.0,0.0,2229.0,301.0,117.0,42.0,5.0,574.0,46.0,46.0,12.0,432.0,149.0,97.0,9.0,189.0,45.0,44.0,75.0,70.0,9.0,81.0,65.0,31.0,19.0,15.0,23.0,218.0,261.0,189.0,9.0
139,Werder Bremen,2318.0,860.0,2645.0,375.0,0.0,10.0,490.0,2714.0,419.0,110.0,34.0,1644.0,419.0,20.0,6.0,529.0,1540.0,726.0,296.0,349.0,24.0,26.0,31.0,184.0,50.0,184.0,42.0,70.0,27.0,885.0,34.0,15.0,3.0,8331.0,1082.0,415.0,108.0,39.0,1912.0,149.0,208.0,49.0,1302.0,623.0,393.0,60.0,550.0,175.0,204.0,251.0,132.0,38.0,301.0,229.0,162.0,51.0,67.0,80.0,804.0,921.0,550.0,38.0
140,West Brom,1427.0,598.0,1328.0,230.0,0.0,6.0,356.0,1315.0,243.0,24.0,5.0,1041.0,243.0,6.0,5.0,291.0,720.0,428.0,134.0,170.0,10.0,14.0,6.0,102.0,28.0,110.0,22.0,76.0,14.0,588.0,7.0,12.0,2.0,4491.0,634.0,305.0,64.0,38.0,1183.0,74.0,145.0,25.0,797.0,367.0,263.0,29.0,392.0,92.0,94.0,134.0,109.0,12.0,169.0,172.0,105.0,35.0,35.0,44.0,420.0,598.0,392.0,12.0


In [300]:
# Count games played for each team
# Combine home and away counts
games_played = games_info['ht'].value_counts() + games_info['at'].value_counts()
team_aggregated['games_played'] = team_aggregated['event_team'].map(games_played).fillna(1)  # Avoid division by zero

In [301]:
# Normalize event-related columns by games played
columns_to_normalize = [
    col for col in team_aggregated.columns 
    if col not in ['event_team', 'games_played']
]
for col in columns_to_normalize:
    team_aggregated[col] = team_aggregated[col] / team_aggregated['games_played']

In [302]:
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.expand_frame_repr', False)  # Disable wrapping to new line
team_aggregated

Unnamed: 0,event_team,event_type_1,event_type_2,event_type_3,event_type_4,event_type_5,event_type_6,event_type_7,event_type_8,event_type_9,event_type_10,event_type_11,event_type2_12.0,event_type2_13.0,event_type2_14.0,event_type2_15.0,location_1.0,location_2.0,location_3.0,location_4.0,location_5.0,location_6.0,location_7.0,location_8.0,location_9.0,location_10.0,location_11.0,location_12.0,location_13.0,location_14.0,location_15.0,location_16.0,location_17.0,location_18.0,assist_method_0,assist_method_1,assist_method_2,assist_method_3,assist_method_4,situation_1.0,situation_2.0,situation_3.0,situation_4.0,bodypart_1.0,bodypart_2.0,bodypart_3.0,shot_place_1.0,shot_place_2.0,shot_place_3.0,shot_place_4.0,shot_place_5.0,shot_place_6.0,shot_place_7.0,shot_place_8.0,shot_place_9.0,shot_place_10.0,shot_place_11.0,shot_place_12.0,shot_place_13.0,shot_outcome_1.0,shot_outcome_2.0,shot_outcome_3.0,shot_outcome_4.0,games_played
0,AC Ajaccio,9.701754,3.938596,13.973684,2.087719,0.000000,0.105263,2.692982,13.456140,3.201754,0.842105,0.131579,6.684211,3.201754,0.149123,0.035088,2.500000,7.456140,2.947368,1.859649,1.640351,0.078947,0.078947,0.087719,0.622807,0.114035,0.807018,0.105263,0.236842,0.131579,4.008772,0.210526,0.175439,0.026316,43.447368,4.298246,1.736842,0.377193,0.271930,8.008772,0.754386,0.649123,0.289474,4.964912,3.280702,1.456140,0.166667,2.149123,0.754386,0.701754,1.105263,0.780702,0.184211,1.192982,1.070175,0.789474,0.219298,0.245614,0.263158,3.245614,4.087719,2.149123,0.184211,114.0
1,AC Milan,14.595238,5.833333,12.747619,2.376190,0.014286,0.085714,2.823810,13.523810,2.585714,0.609524,0.200000,10.476190,2.585714,0.185714,0.052381,3.204762,7.066667,3.733333,1.638095,1.614286,0.047619,0.133333,0.166667,1.023810,0.161905,1.219048,0.138095,0.442857,0.242857,6.976190,0.109524,0.090476,0.004762,44.919048,7.409524,2.271429,0.290476,0.504762,12.452381,0.685714,1.042857,0.414286,9.219048,3.752381,1.623810,0.328571,3.723810,1.271429,1.214286,1.466667,1.042857,0.228571,1.700000,1.452381,1.066667,0.300000,0.323810,0.342857,4.904762,5.695238,3.723810,0.228571,210.0
2,AJ Auxerre,11.736842,4.684211,14.684211,2.342105,0.000000,0.078947,2.736842,12.342105,2.473684,0.921053,0.210526,7.947368,2.473684,0.131579,0.000000,2.842105,6.236842,3.236842,1.842105,1.421053,0.157895,0.105263,0.105263,1.263158,0.263158,0.868421,0.263158,0.473684,0.026316,4.552632,0.131579,0.210526,0.026316,44.263158,4.578947,2.421053,0.421053,0.526316,9.578947,0.473684,1.078947,0.605263,5.236842,4.684211,1.815789,0.157895,2.526316,0.736842,1.026316,1.473684,0.947368,0.157895,1.263158,1.236842,1.263158,0.263158,0.210526,0.421053,4.105263,4.947368,2.526316,0.157895,38.0
3,AS Monaco,12.333333,5.385185,12.340741,1.800000,0.000000,0.059259,2.851852,12.859259,2.111111,0.785185,0.214815,9.029630,2.111111,0.081481,0.051852,3.014815,6.281481,4.133333,1.637037,1.925926,0.059259,0.140741,0.118519,0.970370,0.237037,0.948148,0.325926,0.414815,0.192593,4.496296,0.081481,0.066667,0.014815,41.711111,5.585185,2.688889,0.362963,0.392593,10.274074,0.637037,1.051852,0.370370,5.903704,4.251852,2.177778,0.251852,2.837037,1.059259,1.140741,1.540741,1.066667,0.185185,1.274074,1.385185,0.577778,0.281481,0.303704,0.303704,4.607407,4.666667,2.837037,0.185185,135.0
4,AS Nancy Lorraine,11.020833,3.708333,13.125000,1.666667,0.000000,0.072917,2.416667,12.229167,2.645833,0.697917,0.125000,8.239583,2.645833,0.083333,0.010417,2.489583,6.437500,2.718750,1.791667,1.510417,0.072917,0.114583,0.041667,0.885417,0.156250,0.854167,0.145833,0.291667,0.104167,5.229167,0.177083,0.166667,0.020833,39.468750,5.447917,2.010417,0.437500,0.343750,9.104167,0.718750,0.895833,0.302083,6.781250,2.489583,1.750000,0.354167,2.052083,0.906250,0.791667,1.145833,0.979167,0.187500,1.385417,1.406250,0.854167,0.281250,0.322917,0.302083,3.614583,5.156250,2.052083,0.187500,96.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137,Villarreal,10.812865,4.666667,11.959064,2.292398,0.005848,0.029240,2.807018,12.192982,2.719298,0.608187,0.140351,7.929825,2.719298,0.093567,0.029240,2.783626,6.725146,2.994152,1.374269,1.309942,0.064327,0.204678,0.152047,0.959064,0.198830,1.116959,0.269006,0.374269,0.140351,4.146199,0.052632,0.058480,0.017544,40.304094,5.403509,1.725146,0.251462,0.549708,9.228070,0.485380,0.853801,0.245614,5.479532,3.923977,1.409357,0.356725,2.479532,0.941520,0.883041,1.426901,0.467836,0.169591,1.198830,1.292398,0.690058,0.263158,0.263158,0.292398,3.923977,4.216374,2.479532,0.169591,171.0
138,Watford,11.300000,4.166667,12.083333,2.150000,0.016667,0.066667,2.733333,10.650000,1.366667,0.266667,0.100000,7.750000,1.366667,0.100000,0.050000,2.383333,5.466667,3.416667,1.533333,1.266667,0.016667,0.233333,0.100000,0.883333,0.150000,0.816667,0.150000,0.233333,0.150000,4.933333,0.016667,0.133333,0.000000,37.150000,5.016667,1.950000,0.700000,0.083333,9.566667,0.766667,0.766667,0.200000,7.200000,2.483333,1.616667,0.150000,3.150000,0.750000,0.733333,1.250000,1.166667,0.150000,1.350000,1.083333,0.516667,0.316667,0.250000,0.383333,3.633333,4.350000,3.150000,0.150000,60.0
139,Werder Bremen,12.395722,4.598930,14.144385,2.005348,0.000000,0.053476,2.620321,14.513369,2.240642,0.588235,0.181818,8.791444,2.240642,0.106952,0.032086,2.828877,8.235294,3.882353,1.582888,1.866310,0.128342,0.139037,0.165775,0.983957,0.267380,0.983957,0.224599,0.374332,0.144385,4.732620,0.181818,0.080214,0.016043,44.550802,5.786096,2.219251,0.577540,0.208556,10.224599,0.796791,1.112299,0.262032,6.962567,3.331551,2.101604,0.320856,2.941176,0.935829,1.090909,1.342246,0.705882,0.203209,1.609626,1.224599,0.866310,0.272727,0.358289,0.427807,4.299465,4.925134,2.941176,0.203209,187.0
140,West Brom,6.731132,2.820755,6.264151,1.084906,0.000000,0.028302,1.679245,6.202830,1.146226,0.113208,0.023585,4.910377,1.146226,0.028302,0.023585,1.372642,3.396226,2.018868,0.632075,0.801887,0.047170,0.066038,0.028302,0.481132,0.132075,0.518868,0.103774,0.358491,0.066038,2.773585,0.033019,0.056604,0.009434,21.183962,2.990566,1.438679,0.301887,0.179245,5.580189,0.349057,0.683962,0.117925,3.759434,1.731132,1.240566,0.136792,1.849057,0.433962,0.443396,0.632075,0.514151,0.056604,0.797170,0.811321,0.495283,0.165094,0.165094,0.207547,1.981132,2.820755,1.849057,0.056604,212.0


In [303]:
# 11. Create additional aggregate statistics / ratios
#     Example: total attempts, total fouls, total red cards, etc.

# Identify relevant sets of columns 
event_type_cols = [col for col in columns_to_normalize if col.startswith('event_type_')]
shot_outcome_cols = [col for col in columns_to_normalize if col.startswith('shot_outcome_')]
shot_place_cols = [col for col in columns_to_normalize if col.startswith('shot_place_')]
card_cols = [c for c in event_type_cols if any(x in c for x in ['event_type_4', 'event_type_5', 'event_type_6'])]
foul_cols = [c for c in event_type_cols if 'event_type_3' in c]
situation_cols = [c for c in event_type_cols if 'situation' in c]

In [304]:
card_cols

['event_type_4', 'event_type_5', 'event_type_6']

In [305]:
# Calculate ratios and percentages
team_aggregated['total_events'] = team_aggregated[event_type_cols].sum(axis=1)
team_aggregated['shots_on_target_ratio'] = team_aggregated.get('shot_outcome_1.0', 0) / (team_aggregated['total_events'] + 1e-6)
team_aggregated['cards_per_foul'] = (team_aggregated.get('event_type_4', 0) + team_aggregated.get('event_type_5', 0)) / (team_aggregated.get('event_type_3', 0) + 1e-6)

In [306]:
team_aggregated

Unnamed: 0,event_team,event_type_1,event_type_2,event_type_3,event_type_4,event_type_5,event_type_6,event_type_7,event_type_8,event_type_9,event_type_10,event_type_11,event_type2_12.0,event_type2_13.0,event_type2_14.0,event_type2_15.0,location_1.0,location_2.0,location_3.0,location_4.0,location_5.0,location_6.0,location_7.0,location_8.0,location_9.0,location_10.0,location_11.0,location_12.0,location_13.0,location_14.0,location_15.0,location_16.0,location_17.0,location_18.0,assist_method_0,assist_method_1,assist_method_2,assist_method_3,assist_method_4,situation_1.0,situation_2.0,situation_3.0,situation_4.0,bodypart_1.0,bodypart_2.0,bodypart_3.0,shot_place_1.0,shot_place_2.0,shot_place_3.0,shot_place_4.0,shot_place_5.0,shot_place_6.0,shot_place_7.0,shot_place_8.0,shot_place_9.0,shot_place_10.0,shot_place_11.0,shot_place_12.0,shot_place_13.0,shot_outcome_1.0,shot_outcome_2.0,shot_outcome_3.0,shot_outcome_4.0,games_played,total_events,shots_on_target_ratio,cards_per_foul
0,AC Ajaccio,9.701754,3.938596,13.973684,2.087719,0.000000,0.105263,2.692982,13.456140,3.201754,0.842105,0.131579,6.684211,3.201754,0.149123,0.035088,2.500000,7.456140,2.947368,1.859649,1.640351,0.078947,0.078947,0.087719,0.622807,0.114035,0.807018,0.105263,0.236842,0.131579,4.008772,0.210526,0.175439,0.026316,43.447368,4.298246,1.736842,0.377193,0.271930,8.008772,0.754386,0.649123,0.289474,4.964912,3.280702,1.456140,0.166667,2.149123,0.754386,0.701754,1.105263,0.780702,0.184211,1.192982,1.070175,0.789474,0.219298,0.245614,0.263158,3.245614,4.087719,2.149123,0.184211,114.0,50.131579,0.064742,0.149404
1,AC Milan,14.595238,5.833333,12.747619,2.376190,0.014286,0.085714,2.823810,13.523810,2.585714,0.609524,0.200000,10.476190,2.585714,0.185714,0.052381,3.204762,7.066667,3.733333,1.638095,1.614286,0.047619,0.133333,0.166667,1.023810,0.161905,1.219048,0.138095,0.442857,0.242857,6.976190,0.109524,0.090476,0.004762,44.919048,7.409524,2.271429,0.290476,0.504762,12.452381,0.685714,1.042857,0.414286,9.219048,3.752381,1.623810,0.328571,3.723810,1.271429,1.214286,1.466667,1.042857,0.228571,1.700000,1.452381,1.066667,0.300000,0.323810,0.342857,4.904762,5.695238,3.723810,0.228571,210.0,55.395238,0.088541,0.187523
2,AJ Auxerre,11.736842,4.684211,14.684211,2.342105,0.000000,0.078947,2.736842,12.342105,2.473684,0.921053,0.210526,7.947368,2.473684,0.131579,0.000000,2.842105,6.236842,3.236842,1.842105,1.421053,0.157895,0.105263,0.105263,1.263158,0.263158,0.868421,0.263158,0.473684,0.026316,4.552632,0.131579,0.210526,0.026316,44.263158,4.578947,2.421053,0.421053,0.526316,9.578947,0.473684,1.078947,0.605263,5.236842,4.684211,1.815789,0.157895,2.526316,0.736842,1.026316,1.473684,0.947368,0.157895,1.263158,1.236842,1.263158,0.263158,0.210526,0.421053,4.105263,4.947368,2.526316,0.157895,38.0,52.210526,0.078629,0.159498
3,AS Monaco,12.333333,5.385185,12.340741,1.800000,0.000000,0.059259,2.851852,12.859259,2.111111,0.785185,0.214815,9.029630,2.111111,0.081481,0.051852,3.014815,6.281481,4.133333,1.637037,1.925926,0.059259,0.140741,0.118519,0.970370,0.237037,0.948148,0.325926,0.414815,0.192593,4.496296,0.081481,0.066667,0.014815,41.711111,5.585185,2.688889,0.362963,0.392593,10.274074,0.637037,1.051852,0.370370,5.903704,4.251852,2.177778,0.251852,2.837037,1.059259,1.140741,1.540741,1.066667,0.185185,1.274074,1.385185,0.577778,0.281481,0.303704,0.303704,4.607407,4.666667,2.837037,0.185185,135.0,50.740741,0.090803,0.145858
4,AS Nancy Lorraine,11.020833,3.708333,13.125000,1.666667,0.000000,0.072917,2.416667,12.229167,2.645833,0.697917,0.125000,8.239583,2.645833,0.083333,0.010417,2.489583,6.437500,2.718750,1.791667,1.510417,0.072917,0.114583,0.041667,0.885417,0.156250,0.854167,0.145833,0.291667,0.104167,5.229167,0.177083,0.166667,0.020833,39.468750,5.447917,2.010417,0.437500,0.343750,9.104167,0.718750,0.895833,0.302083,6.781250,2.489583,1.750000,0.354167,2.052083,0.906250,0.791667,1.145833,0.979167,0.187500,1.385417,1.406250,0.854167,0.281250,0.322917,0.302083,3.614583,5.156250,2.052083,0.187500,96.0,47.708333,0.075764,0.126984
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137,Villarreal,10.812865,4.666667,11.959064,2.292398,0.005848,0.029240,2.807018,12.192982,2.719298,0.608187,0.140351,7.929825,2.719298,0.093567,0.029240,2.783626,6.725146,2.994152,1.374269,1.309942,0.064327,0.204678,0.152047,0.959064,0.198830,1.116959,0.269006,0.374269,0.140351,4.146199,0.052632,0.058480,0.017544,40.304094,5.403509,1.725146,0.251462,0.549708,9.228070,0.485380,0.853801,0.245614,5.479532,3.923977,1.409357,0.356725,2.479532,0.941520,0.883041,1.426901,0.467836,0.169591,1.198830,1.292398,0.690058,0.263158,0.263158,0.292398,3.923977,4.216374,2.479532,0.169591,171.0,48.233918,0.081353,0.192176
138,Watford,11.300000,4.166667,12.083333,2.150000,0.016667,0.066667,2.733333,10.650000,1.366667,0.266667,0.100000,7.750000,1.366667,0.100000,0.050000,2.383333,5.466667,3.416667,1.533333,1.266667,0.016667,0.233333,0.100000,0.883333,0.150000,0.816667,0.150000,0.233333,0.150000,4.933333,0.016667,0.133333,0.000000,37.150000,5.016667,1.950000,0.700000,0.083333,9.566667,0.766667,0.766667,0.200000,7.200000,2.483333,1.616667,0.150000,3.150000,0.750000,0.733333,1.250000,1.166667,0.150000,1.350000,1.083333,0.516667,0.316667,0.250000,0.383333,3.633333,4.350000,3.150000,0.150000,60.0,44.900000,0.080921,0.179310
139,Werder Bremen,12.395722,4.598930,14.144385,2.005348,0.000000,0.053476,2.620321,14.513369,2.240642,0.588235,0.181818,8.791444,2.240642,0.106952,0.032086,2.828877,8.235294,3.882353,1.582888,1.866310,0.128342,0.139037,0.165775,0.983957,0.267380,0.983957,0.224599,0.374332,0.144385,4.732620,0.181818,0.080214,0.016043,44.550802,5.786096,2.219251,0.577540,0.208556,10.224599,0.796791,1.112299,0.262032,6.962567,3.331551,2.101604,0.320856,2.941176,0.935829,1.090909,1.342246,0.705882,0.203209,1.609626,1.224599,0.866310,0.272727,0.358289,0.427807,4.299465,4.925134,2.941176,0.203209,187.0,53.342246,0.080602,0.141777
140,West Brom,6.731132,2.820755,6.264151,1.084906,0.000000,0.028302,1.679245,6.202830,1.146226,0.113208,0.023585,4.910377,1.146226,0.028302,0.023585,1.372642,3.396226,2.018868,0.632075,0.801887,0.047170,0.066038,0.028302,0.481132,0.132075,0.518868,0.103774,0.358491,0.066038,2.773585,0.033019,0.056604,0.009434,21.183962,2.990566,1.438679,0.301887,0.179245,5.580189,0.349057,0.683962,0.117925,3.759434,1.731132,1.240566,0.136792,1.849057,0.433962,0.443396,0.632075,0.514151,0.056604,0.797170,0.811321,0.495283,0.165094,0.165094,0.207547,1.981132,2.820755,1.849057,0.056604,212.0,26.094340,0.075922,0.173193


In [307]:
# 5. Generate interaction terms
interaction_pairs = [
    (event_type_cols, shot_outcome_cols),  # Event Type x Shot Outcome
    (shot_outcome_cols, shot_place_cols),  # Shot Outcome x Shot Place
    (event_type_cols, situation_cols),  # Event Type x Situation
]

In [308]:
for category1, category2 in interaction_pairs:
    for col1, col2 in product(category1, category2):
        team_aggregated[f'{col1}_x_{col2}'] = team_aggregated[col1] * team_aggregated[col2]


  team_aggregated[f'{col1}_x_{col2}'] = team_aggregated[col1] * team_aggregated[col2]
  team_aggregated[f'{col1}_x_{col2}'] = team_aggregated[col1] * team_aggregated[col2]
  team_aggregated[f'{col1}_x_{col2}'] = team_aggregated[col1] * team_aggregated[col2]
  team_aggregated[f'{col1}_x_{col2}'] = team_aggregated[col1] * team_aggregated[col2]
  team_aggregated[f'{col1}_x_{col2}'] = team_aggregated[col1] * team_aggregated[col2]
  team_aggregated[f'{col1}_x_{col2}'] = team_aggregated[col1] * team_aggregated[col2]
  team_aggregated[f'{col1}_x_{col2}'] = team_aggregated[col1] * team_aggregated[col2]
  team_aggregated[f'{col1}_x_{col2}'] = team_aggregated[col1] * team_aggregated[col2]
  team_aggregated[f'{col1}_x_{col2}'] = team_aggregated[col1] * team_aggregated[col2]
  team_aggregated[f'{col1}_x_{col2}'] = team_aggregated[col1] * team_aggregated[col2]
  team_aggregated[f'{col1}_x_{col2}'] = team_aggregated[col1] * team_aggregated[col2]
  team_aggregated[f'{col1}_x_{col2}'] = team_aggregate

In [309]:
# Drop games_played (not needed for clustering features)
team_aggregated.drop(columns=['games_played'], inplace=True)

In [310]:
team_aggregated.columns

Index(['event_team', 'event_type_1', 'event_type_2', 'event_type_3',
       'event_type_4', 'event_type_5', 'event_type_6', 'event_type_7',
       'event_type_8', 'event_type_9',
       ...
       'shot_outcome_4.0_x_shot_place_4.0',
       'shot_outcome_4.0_x_shot_place_5.0',
       'shot_outcome_4.0_x_shot_place_6.0',
       'shot_outcome_4.0_x_shot_place_7.0',
       'shot_outcome_4.0_x_shot_place_8.0',
       'shot_outcome_4.0_x_shot_place_9.0',
       'shot_outcome_4.0_x_shot_place_10.0',
       'shot_outcome_4.0_x_shot_place_11.0',
       'shot_outcome_4.0_x_shot_place_12.0',
       'shot_outcome_4.0_x_shot_place_13.0'],
      dtype='object', length=162)

In [311]:
team_aggregated

Unnamed: 0,event_team,event_type_1,event_type_2,event_type_3,event_type_4,event_type_5,event_type_6,event_type_7,event_type_8,event_type_9,event_type_10,event_type_11,event_type2_12.0,event_type2_13.0,event_type2_14.0,event_type2_15.0,location_1.0,location_2.0,location_3.0,location_4.0,location_5.0,location_6.0,location_7.0,location_8.0,location_9.0,location_10.0,location_11.0,location_12.0,location_13.0,location_14.0,location_15.0,location_16.0,location_17.0,location_18.0,assist_method_0,assist_method_1,assist_method_2,assist_method_3,assist_method_4,situation_1.0,situation_2.0,situation_3.0,situation_4.0,bodypart_1.0,bodypart_2.0,bodypart_3.0,shot_place_1.0,shot_place_2.0,shot_place_3.0,shot_place_4.0,shot_place_5.0,shot_place_6.0,shot_place_7.0,shot_place_8.0,shot_place_9.0,shot_place_10.0,shot_place_11.0,shot_place_12.0,shot_place_13.0,shot_outcome_1.0,shot_outcome_2.0,shot_outcome_3.0,shot_outcome_4.0,total_events,shots_on_target_ratio,cards_per_foul,event_type_1_x_shot_outcome_1.0,event_type_1_x_shot_outcome_2.0,event_type_1_x_shot_outcome_3.0,event_type_1_x_shot_outcome_4.0,event_type_2_x_shot_outcome_1.0,event_type_2_x_shot_outcome_2.0,event_type_2_x_shot_outcome_3.0,event_type_2_x_shot_outcome_4.0,event_type_3_x_shot_outcome_1.0,event_type_3_x_shot_outcome_2.0,event_type_3_x_shot_outcome_3.0,event_type_3_x_shot_outcome_4.0,event_type_4_x_shot_outcome_1.0,event_type_4_x_shot_outcome_2.0,event_type_4_x_shot_outcome_3.0,event_type_4_x_shot_outcome_4.0,event_type_5_x_shot_outcome_1.0,event_type_5_x_shot_outcome_2.0,event_type_5_x_shot_outcome_3.0,event_type_5_x_shot_outcome_4.0,event_type_6_x_shot_outcome_1.0,event_type_6_x_shot_outcome_2.0,event_type_6_x_shot_outcome_3.0,event_type_6_x_shot_outcome_4.0,event_type_7_x_shot_outcome_1.0,event_type_7_x_shot_outcome_2.0,event_type_7_x_shot_outcome_3.0,event_type_7_x_shot_outcome_4.0,event_type_8_x_shot_outcome_1.0,event_type_8_x_shot_outcome_2.0,event_type_8_x_shot_outcome_3.0,event_type_8_x_shot_outcome_4.0,event_type_9_x_shot_outcome_1.0,event_type_9_x_shot_outcome_2.0,event_type_9_x_shot_outcome_3.0,event_type_9_x_shot_outcome_4.0,event_type_10_x_shot_outcome_1.0,event_type_10_x_shot_outcome_2.0,event_type_10_x_shot_outcome_3.0,event_type_10_x_shot_outcome_4.0,event_type_11_x_shot_outcome_1.0,event_type_11_x_shot_outcome_2.0,event_type_11_x_shot_outcome_3.0,event_type_11_x_shot_outcome_4.0,shot_outcome_1.0_x_shot_place_1.0,shot_outcome_1.0_x_shot_place_2.0,shot_outcome_1.0_x_shot_place_3.0,shot_outcome_1.0_x_shot_place_4.0,shot_outcome_1.0_x_shot_place_5.0,shot_outcome_1.0_x_shot_place_6.0,shot_outcome_1.0_x_shot_place_7.0,shot_outcome_1.0_x_shot_place_8.0,shot_outcome_1.0_x_shot_place_9.0,shot_outcome_1.0_x_shot_place_10.0,shot_outcome_1.0_x_shot_place_11.0,shot_outcome_1.0_x_shot_place_12.0,shot_outcome_1.0_x_shot_place_13.0,shot_outcome_2.0_x_shot_place_1.0,shot_outcome_2.0_x_shot_place_2.0,shot_outcome_2.0_x_shot_place_3.0,shot_outcome_2.0_x_shot_place_4.0,shot_outcome_2.0_x_shot_place_5.0,shot_outcome_2.0_x_shot_place_6.0,shot_outcome_2.0_x_shot_place_7.0,shot_outcome_2.0_x_shot_place_8.0,shot_outcome_2.0_x_shot_place_9.0,shot_outcome_2.0_x_shot_place_10.0,shot_outcome_2.0_x_shot_place_11.0,shot_outcome_2.0_x_shot_place_12.0,shot_outcome_2.0_x_shot_place_13.0,shot_outcome_3.0_x_shot_place_1.0,shot_outcome_3.0_x_shot_place_2.0,shot_outcome_3.0_x_shot_place_3.0,shot_outcome_3.0_x_shot_place_4.0,shot_outcome_3.0_x_shot_place_5.0,shot_outcome_3.0_x_shot_place_6.0,shot_outcome_3.0_x_shot_place_7.0,shot_outcome_3.0_x_shot_place_8.0,shot_outcome_3.0_x_shot_place_9.0,shot_outcome_3.0_x_shot_place_10.0,shot_outcome_3.0_x_shot_place_11.0,shot_outcome_3.0_x_shot_place_12.0,shot_outcome_3.0_x_shot_place_13.0,shot_outcome_4.0_x_shot_place_1.0,shot_outcome_4.0_x_shot_place_2.0,shot_outcome_4.0_x_shot_place_3.0,shot_outcome_4.0_x_shot_place_4.0,shot_outcome_4.0_x_shot_place_5.0,shot_outcome_4.0_x_shot_place_6.0,shot_outcome_4.0_x_shot_place_7.0,shot_outcome_4.0_x_shot_place_8.0,shot_outcome_4.0_x_shot_place_9.0,shot_outcome_4.0_x_shot_place_10.0,shot_outcome_4.0_x_shot_place_11.0,shot_outcome_4.0_x_shot_place_12.0,shot_outcome_4.0_x_shot_place_13.0
0,AC Ajaccio,9.701754,3.938596,13.973684,2.087719,0.000000,0.105263,2.692982,13.456140,3.201754,0.842105,0.131579,6.684211,3.201754,0.149123,0.035088,2.500000,7.456140,2.947368,1.859649,1.640351,0.078947,0.078947,0.087719,0.622807,0.114035,0.807018,0.105263,0.236842,0.131579,4.008772,0.210526,0.175439,0.026316,43.447368,4.298246,1.736842,0.377193,0.271930,8.008772,0.754386,0.649123,0.289474,4.964912,3.280702,1.456140,0.166667,2.149123,0.754386,0.701754,1.105263,0.780702,0.184211,1.192982,1.070175,0.789474,0.219298,0.245614,0.263158,3.245614,4.087719,2.149123,0.184211,50.131579,0.064742,0.149404,31.488150,39.658049,20.850262,1.787165,12.783164,16.099877,8.464528,0.725531,45.353186,57.120499,30.031163,2.574100,6.775931,8.534010,4.486765,0.384580,0.000000,0.000000,0.000000,0.000000,0.341644,0.430286,0.226223,0.019391,8.740382,11.008156,5.787550,0.496076,43.673438,55.004925,28.918898,2.478763,10.391659,13.087873,6.880963,0.589797,2.733149,3.442290,1.809788,0.155125,0.427054,0.537858,0.282779,0.024238,0.540936,6.975223,2.448446,2.277624,3.587258,2.533857,0.597876,3.871961,3.473376,2.562327,0.711757,0.797168,0.854109,0.681287,8.785011,3.083718,2.868575,4.518006,3.191290,0.753001,4.876577,4.374577,3.227147,0.896430,1.004001,1.075716,0.358187,4.618729,1.621268,1.508156,2.375346,1.677824,0.395891,2.563866,2.299938,1.696676,0.471299,0.527855,0.565559,0.030702,0.395891,0.138966,0.129271,0.203601,0.143813,0.033934,0.219760,0.197138,0.145429,0.040397,0.045245,0.048476
1,AC Milan,14.595238,5.833333,12.747619,2.376190,0.014286,0.085714,2.823810,13.523810,2.585714,0.609524,0.200000,10.476190,2.585714,0.185714,0.052381,3.204762,7.066667,3.733333,1.638095,1.614286,0.047619,0.133333,0.166667,1.023810,0.161905,1.219048,0.138095,0.442857,0.242857,6.976190,0.109524,0.090476,0.004762,44.919048,7.409524,2.271429,0.290476,0.504762,12.452381,0.685714,1.042857,0.414286,9.219048,3.752381,1.623810,0.328571,3.723810,1.271429,1.214286,1.466667,1.042857,0.228571,1.700000,1.452381,1.066667,0.300000,0.323810,0.342857,4.904762,5.695238,3.723810,0.228571,55.395238,0.088541,0.187523,71.586168,83.123356,54.349887,3.336054,28.611111,33.222222,21.722222,1.333333,62.524036,72.600726,47.469705,2.913741,11.654649,13.532971,8.848481,0.543129,0.070068,0.081361,0.053197,0.003265,0.420408,0.488163,0.319184,0.019592,13.850113,16.082268,10.515329,0.645442,66.331066,77.021315,50.360091,3.091156,12.682313,14.726259,9.628707,0.591020,2.989569,3.471383,2.269751,0.139320,0.980952,1.139048,0.744762,0.045714,1.611565,18.264399,6.236054,5.955782,7.193651,5.114966,1.121088,8.338095,7.123583,5.231746,1.471429,1.588209,1.681633,1.871293,21.207982,7.241088,6.915646,8.353016,5.939320,1.301769,9.681905,8.271655,6.074921,1.708571,1.844172,1.952653,1.223537,13.866757,4.734558,4.521769,5.461587,3.883401,0.851156,6.330476,5.408390,3.972063,1.117143,1.205805,1.276735,0.075102,0.851156,0.290612,0.277551,0.335238,0.238367,0.052245,0.388571,0.331973,0.243810,0.068571,0.074014,0.078367
2,AJ Auxerre,11.736842,4.684211,14.684211,2.342105,0.000000,0.078947,2.736842,12.342105,2.473684,0.921053,0.210526,7.947368,2.473684,0.131579,0.000000,2.842105,6.236842,3.236842,1.842105,1.421053,0.157895,0.105263,0.105263,1.263158,0.263158,0.868421,0.263158,0.473684,0.026316,4.552632,0.131579,0.210526,0.026316,44.263158,4.578947,2.421053,0.421053,0.526316,9.578947,0.473684,1.078947,0.605263,5.236842,4.684211,1.815789,0.157895,2.526316,0.736842,1.026316,1.473684,0.947368,0.157895,1.263158,1.236842,1.263158,0.263158,0.210526,0.421053,4.105263,4.947368,2.526316,0.157895,52.210526,0.078629,0.159498,48.182825,58.066482,29.650970,1.853186,19.229917,23.174515,11.833795,0.739612,60.282548,72.648199,37.096953,2.318560,9.614958,11.587258,5.916898,0.369806,0.000000,0.000000,0.000000,0.000000,0.324100,0.390582,0.199446,0.012465,11.235457,13.540166,6.914127,0.432133,50.667590,61.060942,31.180055,1.948753,10.155125,12.238227,6.249307,0.390582,3.781163,4.556787,2.326870,0.145429,0.864266,1.041551,0.531856,0.033241,0.648199,10.371191,3.024931,4.213296,6.049861,3.889197,0.648199,5.185596,5.077562,5.185596,1.080332,0.864266,1.728532,0.781163,12.498615,3.645429,5.077562,7.290859,4.686981,0.781163,6.249307,6.119114,6.249307,1.301939,1.041551,2.083102,0.398892,6.382271,1.861496,2.592798,3.722992,2.393352,0.398892,3.191136,3.124654,3.191136,0.664820,0.531856,1.063712,0.024931,0.398892,0.116343,0.162050,0.232687,0.149584,0.024931,0.199446,0.195291,0.199446,0.041551,0.033241,0.066482
3,AS Monaco,12.333333,5.385185,12.340741,1.800000,0.000000,0.059259,2.851852,12.859259,2.111111,0.785185,0.214815,9.029630,2.111111,0.081481,0.051852,3.014815,6.281481,4.133333,1.637037,1.925926,0.059259,0.140741,0.118519,0.970370,0.237037,0.948148,0.325926,0.414815,0.192593,4.496296,0.081481,0.066667,0.014815,41.711111,5.585185,2.688889,0.362963,0.392593,10.274074,0.637037,1.051852,0.370370,5.903704,4.251852,2.177778,0.251852,2.837037,1.059259,1.140741,1.540741,1.066667,0.185185,1.274074,1.385185,0.577778,0.281481,0.303704,0.303704,4.607407,4.666667,2.837037,0.185185,50.740741,0.090803,0.145858,56.824691,57.555556,34.990123,2.283951,24.811742,25.130864,15.277970,0.997257,56.858820,57.590123,35.011139,2.285322,8.293333,8.400000,5.106667,0.333333,0.000000,0.000000,0.000000,0.000000,0.273032,0.276543,0.168121,0.010974,13.139643,13.308642,8.090809,0.528121,59.247846,60.009877,36.482195,2.381344,9.726749,9.851852,5.989300,0.390947,3.617668,3.664198,2.227599,0.145405,0.989739,1.002469,0.609438,0.039781,1.160384,13.071385,4.880439,5.255857,7.098820,4.914568,0.853224,5.870178,6.382112,2.662058,1.296900,1.399287,1.399287,1.175309,13.239506,4.943210,5.323457,7.190123,4.977778,0.864198,5.945679,6.464198,2.696296,1.313580,1.417284,1.417284,0.714513,8.048779,3.005158,3.236324,4.371139,3.026173,0.525377,3.614595,3.929822,1.639177,0.798573,0.861619,0.861619,0.046639,0.525377,0.196159,0.211248,0.285322,0.197531,0.034294,0.235940,0.256516,0.106996,0.052126,0.056241,0.056241
4,AS Nancy Lorraine,11.020833,3.708333,13.125000,1.666667,0.000000,0.072917,2.416667,12.229167,2.645833,0.697917,0.125000,8.239583,2.645833,0.083333,0.010417,2.489583,6.437500,2.718750,1.791667,1.510417,0.072917,0.114583,0.041667,0.885417,0.156250,0.854167,0.145833,0.291667,0.104167,5.229167,0.177083,0.166667,0.020833,39.468750,5.447917,2.010417,0.437500,0.343750,9.104167,0.718750,0.895833,0.302083,6.781250,2.489583,1.750000,0.354167,2.052083,0.906250,0.791667,1.145833,0.979167,0.187500,1.385417,1.406250,0.854167,0.281250,0.322917,0.302083,3.614583,5.156250,2.052083,0.187500,47.708333,0.075764,0.126984,39.835720,56.826172,22.615668,2.066406,13.404080,19.121094,7.609809,0.695312,47.441406,67.675781,26.933594,2.460938,6.024306,8.593750,3.420139,0.312500,0.000000,0.000000,0.000000,0.000000,0.263563,0.375977,0.149631,0.013672,8.735243,12.460938,4.959201,0.453125,44.203342,63.056641,25.095269,2.292969,9.563585,13.642578,5.429470,0.496094,2.522678,3.598633,1.432183,0.130859,0.451823,0.644531,0.256510,0.023438,1.280165,7.417426,3.275716,2.861545,4.141710,3.539280,0.677734,5.007704,5.083008,3.087457,1.016602,1.167209,1.091905,1.826172,10.581055,4.672852,4.082031,5.908203,5.048828,0.966797,7.143555,7.250977,4.404297,1.450195,1.665039,1.557617,0.726780,4.211046,1.859701,1.624566,2.351345,2.009332,0.384766,2.842990,2.885742,1.752821,0.577148,0.662652,0.619900,0.066406,0.384766,0.169922,0.148438,0.214844,0.183594,0.035156,0.259766,0.263672,0.160156,0.052734,0.060547,0.056641
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137,Villarreal,10.812865,4.666667,11.959064,2.292398,0.005848,0.029240,2.807018,12.192982,2.719298,0.608187,0.140351,7.929825,2.719298,0.093567,0.029240,2.783626,6.725146,2.994152,1.374269,1.309942,0.064327,0.204678,0.152047,0.959064,0.198830,1.116959,0.269006,0.374269,0.140351,4.146199,0.052632,0.058480,0.017544,40.304094,5.403509,1.725146,0.251462,0.549708,9.228070,0.485380,0.853801,0.245614,5.479532,3.923977,1.409357,0.356725,2.479532,0.941520,0.883041,1.426901,0.467836,0.169591,1.198830,1.292398,0.690058,0.263158,0.263158,0.292398,3.923977,4.216374,2.479532,0.169591,48.233918,0.081353,0.192176,42.429431,45.591088,26.810848,1.833761,18.311891,19.676413,11.571150,0.791423,46.927089,50.423891,29.652885,2.028145,8.995315,9.665607,5.684074,0.388769,0.022947,0.024657,0.014500,0.000992,0.114736,0.123286,0.072501,0.004959,11.014671,11.835437,6.960090,0.476044,47.844978,51.410177,30.232892,2.067816,10.670463,11.465579,6.742587,0.461168,2.386512,2.564345,1.508020,0.103143,0.550734,0.591772,0.348005,0.023802,1.399781,9.729626,3.694504,3.465032,5.599125,1.835779,0.665470,4.704182,5.071338,2.707773,1.032625,1.032625,1.147362,1.504087,10.454636,3.969803,3.723231,6.016347,1.972573,0.715058,5.054718,5.449232,2.909545,1.109572,1.109572,1.232858,0.884511,6.148080,2.334530,2.189528,3.538046,1.160015,0.420505,2.972539,3.204542,1.711022,0.652508,0.652508,0.725009,0.060497,0.420505,0.159673,0.149755,0.241989,0.079341,0.028761,0.203310,0.219179,0.117027,0.044629,0.044629,0.049588
138,Watford,11.300000,4.166667,12.083333,2.150000,0.016667,0.066667,2.733333,10.650000,1.366667,0.266667,0.100000,7.750000,1.366667,0.100000,0.050000,2.383333,5.466667,3.416667,1.533333,1.266667,0.016667,0.233333,0.100000,0.883333,0.150000,0.816667,0.150000,0.233333,0.150000,4.933333,0.016667,0.133333,0.000000,37.150000,5.016667,1.950000,0.700000,0.083333,9.566667,0.766667,0.766667,0.200000,7.200000,2.483333,1.616667,0.150000,3.150000,0.750000,0.733333,1.250000,1.166667,0.150000,1.350000,1.083333,0.516667,0.316667,0.250000,0.383333,3.633333,4.350000,3.150000,0.150000,44.900000,0.080921,0.179310,41.056667,49.155000,35.595000,1.695000,15.138889,18.125000,13.125000,0.625000,43.902778,52.562500,38.062500,1.812500,7.811667,9.352500,6.772500,0.322500,0.060556,0.072500,0.052500,0.002500,0.242222,0.290000,0.210000,0.010000,9.931111,11.890000,8.610000,0.410000,38.695000,46.327500,33.547500,1.597500,4.965556,5.945000,4.305000,0.205000,0.968889,1.160000,0.840000,0.040000,0.363333,0.435000,0.315000,0.015000,0.545000,11.445000,2.725000,2.664444,4.541667,4.238889,0.545000,4.905000,3.936111,1.877222,1.150556,0.908333,1.392778,0.652500,13.702500,3.262500,3.190000,5.437500,5.075000,0.652500,5.872500,4.712500,2.247500,1.377500,1.087500,1.667500,0.472500,9.922500,2.362500,2.310000,3.937500,3.675000,0.472500,4.252500,3.412500,1.627500,0.997500,0.787500,1.207500,0.022500,0.472500,0.112500,0.110000,0.187500,0.175000,0.022500,0.202500,0.162500,0.077500,0.047500,0.037500,0.057500
139,Werder Bremen,12.395722,4.598930,14.144385,2.005348,0.000000,0.053476,2.620321,14.513369,2.240642,0.588235,0.181818,8.791444,2.240642,0.106952,0.032086,2.828877,8.235294,3.882353,1.582888,1.866310,0.128342,0.139037,0.165775,0.983957,0.267380,0.983957,0.224599,0.374332,0.144385,4.732620,0.181818,0.080214,0.016043,44.550802,5.786096,2.219251,0.577540,0.208556,10.224599,0.796791,1.112299,0.262032,6.962567,3.331551,2.101604,0.320856,2.941176,0.935829,1.090909,1.342246,0.705882,0.203209,1.609626,1.224599,0.866310,0.272727,0.358289,0.427807,4.299465,4.925134,2.941176,0.203209,53.342246,0.080602,0.141777,53.294976,61.050588,36.458006,2.518917,19.772942,22.650347,13.526266,0.934542,60.813292,69.662987,41.601132,2.874260,8.621922,9.876605,5.898081,0.407504,0.000000,0.000000,0.000000,0.000000,0.229918,0.263376,0.157282,0.010867,11.265978,12.905431,7.706826,0.532472,62.399725,71.480283,42.686379,2.949241,9.633561,11.035460,6.590123,0.455318,2.529097,2.897137,1.730104,0.119534,0.781721,0.895479,0.534759,0.036947,1.379508,12.645486,4.023564,4.690326,5.770940,3.034917,0.873688,6.920530,5.265121,3.724670,1.172581,1.540450,1.839343,1.580257,14.485687,4.609082,5.372873,6.610741,3.476565,1.000829,7.927622,6.031313,4.266693,1.343218,1.764620,2.107009,0.943693,8.650519,2.752438,3.208556,3.947782,2.076125,0.597672,4.734193,3.601762,2.547971,0.802139,1.053791,1.258257,0.065201,0.597672,0.190168,0.221682,0.272756,0.143441,0.041294,0.327090,0.248849,0.176042,0.055421,0.072807,0.086934
140,West Brom,6.731132,2.820755,6.264151,1.084906,0.000000,0.028302,1.679245,6.202830,1.146226,0.113208,0.023585,4.910377,1.146226,0.028302,0.023585,1.372642,3.396226,2.018868,0.632075,0.801887,0.047170,0.066038,0.028302,0.481132,0.132075,0.518868,0.103774,0.358491,0.066038,2.773585,0.033019,0.056604,0.009434,21.183962,2.990566,1.438679,0.301887,0.179245,5.580189,0.349057,0.683962,0.117925,3.759434,1.731132,1.240566,0.136792,1.849057,0.433962,0.443396,0.632075,0.514151,0.056604,0.797170,0.811321,0.495283,0.165094,0.165094,0.207547,1.981132,2.820755,1.849057,0.056604,26.094340,0.075922,0.173193,13.335262,18.986873,12.446244,0.381007,5.588288,7.956657,5.215735,0.159665,12.410110,17.669633,11.582770,0.354575,2.149341,3.060253,2.006052,0.061410,0.000000,0.000000,0.000000,0.000000,0.056070,0.079833,0.052332,0.001602,3.326807,4.736739,3.105020,0.095052,12.288626,17.496663,11.469384,0.351104,2.270826,3.233224,2.119438,0.064881,0.224279,0.319331,0.209327,0.006408,0.046725,0.066527,0.043610,0.001335,0.271004,3.663225,0.859737,0.878426,1.252225,1.018601,0.112140,1.579299,1.607334,0.981221,0.327074,0.327074,0.411178,0.385858,5.215735,1.224101,1.250712,1.782930,1.450294,0.159665,2.248621,2.288537,1.397072,0.465691,0.465691,0.585440,0.252937,3.419010,0.802421,0.819865,1.168743,0.950694,0.104664,1.474012,1.500178,0.915806,0.305269,0.305269,0.383766,0.007743,0.104664,0.024564,0.025098,0.035778,0.029103,0.003204,0.045123,0.045924,0.028035,0.009345,0.009345,0.011748


In [312]:
# 6. Feature Selection: Remove low variance features
variances = team_aggregated.drop(columns=['event_team']).var(axis=0)
low_variance_features = variances[variances < 0.001].index.tolist()
team_aggregated.drop(columns=low_variance_features, inplace=True, errors='ignore')

In [313]:
team_aggregated

Unnamed: 0,event_team,event_type_1,event_type_2,event_type_3,event_type_4,event_type_6,event_type_7,event_type_8,event_type_9,event_type_10,event_type_11,event_type2_12.0,event_type2_13.0,event_type2_14.0,location_1.0,location_2.0,location_3.0,location_4.0,location_5.0,location_6.0,location_7.0,location_8.0,location_9.0,location_10.0,location_11.0,location_12.0,location_13.0,location_14.0,location_15.0,location_16.0,location_17.0,assist_method_0,assist_method_1,assist_method_2,assist_method_3,assist_method_4,situation_1.0,situation_2.0,situation_3.0,situation_4.0,bodypart_1.0,bodypart_2.0,bodypart_3.0,shot_place_1.0,shot_place_2.0,shot_place_3.0,shot_place_4.0,shot_place_5.0,shot_place_6.0,shot_place_7.0,shot_place_8.0,shot_place_9.0,shot_place_10.0,shot_place_11.0,shot_place_12.0,shot_place_13.0,shot_outcome_1.0,shot_outcome_2.0,shot_outcome_3.0,shot_outcome_4.0,total_events,cards_per_foul,event_type_1_x_shot_outcome_1.0,event_type_1_x_shot_outcome_2.0,event_type_1_x_shot_outcome_3.0,event_type_1_x_shot_outcome_4.0,event_type_2_x_shot_outcome_1.0,event_type_2_x_shot_outcome_2.0,event_type_2_x_shot_outcome_3.0,event_type_2_x_shot_outcome_4.0,event_type_3_x_shot_outcome_1.0,event_type_3_x_shot_outcome_2.0,event_type_3_x_shot_outcome_3.0,event_type_3_x_shot_outcome_4.0,event_type_4_x_shot_outcome_1.0,event_type_4_x_shot_outcome_2.0,event_type_4_x_shot_outcome_3.0,event_type_4_x_shot_outcome_4.0,event_type_5_x_shot_outcome_1.0,event_type_5_x_shot_outcome_2.0,event_type_6_x_shot_outcome_1.0,event_type_6_x_shot_outcome_2.0,event_type_6_x_shot_outcome_3.0,event_type_7_x_shot_outcome_1.0,event_type_7_x_shot_outcome_2.0,event_type_7_x_shot_outcome_3.0,event_type_7_x_shot_outcome_4.0,event_type_8_x_shot_outcome_1.0,event_type_8_x_shot_outcome_2.0,event_type_8_x_shot_outcome_3.0,event_type_8_x_shot_outcome_4.0,event_type_9_x_shot_outcome_1.0,event_type_9_x_shot_outcome_2.0,event_type_9_x_shot_outcome_3.0,event_type_9_x_shot_outcome_4.0,event_type_10_x_shot_outcome_1.0,event_type_10_x_shot_outcome_2.0,event_type_10_x_shot_outcome_3.0,event_type_10_x_shot_outcome_4.0,event_type_11_x_shot_outcome_1.0,event_type_11_x_shot_outcome_2.0,event_type_11_x_shot_outcome_3.0,shot_outcome_1.0_x_shot_place_1.0,shot_outcome_1.0_x_shot_place_2.0,shot_outcome_1.0_x_shot_place_3.0,shot_outcome_1.0_x_shot_place_4.0,shot_outcome_1.0_x_shot_place_5.0,shot_outcome_1.0_x_shot_place_6.0,shot_outcome_1.0_x_shot_place_7.0,shot_outcome_1.0_x_shot_place_8.0,shot_outcome_1.0_x_shot_place_9.0,shot_outcome_1.0_x_shot_place_10.0,shot_outcome_1.0_x_shot_place_11.0,shot_outcome_1.0_x_shot_place_12.0,shot_outcome_1.0_x_shot_place_13.0,shot_outcome_2.0_x_shot_place_1.0,shot_outcome_2.0_x_shot_place_2.0,shot_outcome_2.0_x_shot_place_3.0,shot_outcome_2.0_x_shot_place_4.0,shot_outcome_2.0_x_shot_place_5.0,shot_outcome_2.0_x_shot_place_6.0,shot_outcome_2.0_x_shot_place_7.0,shot_outcome_2.0_x_shot_place_8.0,shot_outcome_2.0_x_shot_place_9.0,shot_outcome_2.0_x_shot_place_10.0,shot_outcome_2.0_x_shot_place_11.0,shot_outcome_2.0_x_shot_place_12.0,shot_outcome_2.0_x_shot_place_13.0,shot_outcome_3.0_x_shot_place_1.0,shot_outcome_3.0_x_shot_place_2.0,shot_outcome_3.0_x_shot_place_3.0,shot_outcome_3.0_x_shot_place_4.0,shot_outcome_3.0_x_shot_place_5.0,shot_outcome_3.0_x_shot_place_6.0,shot_outcome_3.0_x_shot_place_7.0,shot_outcome_3.0_x_shot_place_8.0,shot_outcome_3.0_x_shot_place_9.0,shot_outcome_3.0_x_shot_place_10.0,shot_outcome_3.0_x_shot_place_11.0,shot_outcome_3.0_x_shot_place_12.0,shot_outcome_3.0_x_shot_place_13.0,shot_outcome_4.0_x_shot_place_1.0,shot_outcome_4.0_x_shot_place_2.0,shot_outcome_4.0_x_shot_place_3.0,shot_outcome_4.0_x_shot_place_4.0,shot_outcome_4.0_x_shot_place_5.0,shot_outcome_4.0_x_shot_place_6.0,shot_outcome_4.0_x_shot_place_8.0,shot_outcome_4.0_x_shot_place_9.0,shot_outcome_4.0_x_shot_place_10.0,shot_outcome_4.0_x_shot_place_13.0
0,AC Ajaccio,9.701754,3.938596,13.973684,2.087719,0.105263,2.692982,13.456140,3.201754,0.842105,0.131579,6.684211,3.201754,0.149123,2.500000,7.456140,2.947368,1.859649,1.640351,0.078947,0.078947,0.087719,0.622807,0.114035,0.807018,0.105263,0.236842,0.131579,4.008772,0.210526,0.175439,43.447368,4.298246,1.736842,0.377193,0.271930,8.008772,0.754386,0.649123,0.289474,4.964912,3.280702,1.456140,0.166667,2.149123,0.754386,0.701754,1.105263,0.780702,0.184211,1.192982,1.070175,0.789474,0.219298,0.245614,0.263158,3.245614,4.087719,2.149123,0.184211,50.131579,0.149404,31.488150,39.658049,20.850262,1.787165,12.783164,16.099877,8.464528,0.725531,45.353186,57.120499,30.031163,2.574100,6.775931,8.534010,4.486765,0.384580,0.000000,0.000000,0.341644,0.430286,0.226223,8.740382,11.008156,5.787550,0.496076,43.673438,55.004925,28.918898,2.478763,10.391659,13.087873,6.880963,0.589797,2.733149,3.442290,1.809788,0.155125,0.427054,0.537858,0.282779,0.540936,6.975223,2.448446,2.277624,3.587258,2.533857,0.597876,3.871961,3.473376,2.562327,0.711757,0.797168,0.854109,0.681287,8.785011,3.083718,2.868575,4.518006,3.191290,0.753001,4.876577,4.374577,3.227147,0.896430,1.004001,1.075716,0.358187,4.618729,1.621268,1.508156,2.375346,1.677824,0.395891,2.563866,2.299938,1.696676,0.471299,0.527855,0.565559,0.030702,0.395891,0.138966,0.129271,0.203601,0.143813,0.219760,0.197138,0.145429,0.048476
1,AC Milan,14.595238,5.833333,12.747619,2.376190,0.085714,2.823810,13.523810,2.585714,0.609524,0.200000,10.476190,2.585714,0.185714,3.204762,7.066667,3.733333,1.638095,1.614286,0.047619,0.133333,0.166667,1.023810,0.161905,1.219048,0.138095,0.442857,0.242857,6.976190,0.109524,0.090476,44.919048,7.409524,2.271429,0.290476,0.504762,12.452381,0.685714,1.042857,0.414286,9.219048,3.752381,1.623810,0.328571,3.723810,1.271429,1.214286,1.466667,1.042857,0.228571,1.700000,1.452381,1.066667,0.300000,0.323810,0.342857,4.904762,5.695238,3.723810,0.228571,55.395238,0.187523,71.586168,83.123356,54.349887,3.336054,28.611111,33.222222,21.722222,1.333333,62.524036,72.600726,47.469705,2.913741,11.654649,13.532971,8.848481,0.543129,0.070068,0.081361,0.420408,0.488163,0.319184,13.850113,16.082268,10.515329,0.645442,66.331066,77.021315,50.360091,3.091156,12.682313,14.726259,9.628707,0.591020,2.989569,3.471383,2.269751,0.139320,0.980952,1.139048,0.744762,1.611565,18.264399,6.236054,5.955782,7.193651,5.114966,1.121088,8.338095,7.123583,5.231746,1.471429,1.588209,1.681633,1.871293,21.207982,7.241088,6.915646,8.353016,5.939320,1.301769,9.681905,8.271655,6.074921,1.708571,1.844172,1.952653,1.223537,13.866757,4.734558,4.521769,5.461587,3.883401,0.851156,6.330476,5.408390,3.972063,1.117143,1.205805,1.276735,0.075102,0.851156,0.290612,0.277551,0.335238,0.238367,0.388571,0.331973,0.243810,0.078367
2,AJ Auxerre,11.736842,4.684211,14.684211,2.342105,0.078947,2.736842,12.342105,2.473684,0.921053,0.210526,7.947368,2.473684,0.131579,2.842105,6.236842,3.236842,1.842105,1.421053,0.157895,0.105263,0.105263,1.263158,0.263158,0.868421,0.263158,0.473684,0.026316,4.552632,0.131579,0.210526,44.263158,4.578947,2.421053,0.421053,0.526316,9.578947,0.473684,1.078947,0.605263,5.236842,4.684211,1.815789,0.157895,2.526316,0.736842,1.026316,1.473684,0.947368,0.157895,1.263158,1.236842,1.263158,0.263158,0.210526,0.421053,4.105263,4.947368,2.526316,0.157895,52.210526,0.159498,48.182825,58.066482,29.650970,1.853186,19.229917,23.174515,11.833795,0.739612,60.282548,72.648199,37.096953,2.318560,9.614958,11.587258,5.916898,0.369806,0.000000,0.000000,0.324100,0.390582,0.199446,11.235457,13.540166,6.914127,0.432133,50.667590,61.060942,31.180055,1.948753,10.155125,12.238227,6.249307,0.390582,3.781163,4.556787,2.326870,0.145429,0.864266,1.041551,0.531856,0.648199,10.371191,3.024931,4.213296,6.049861,3.889197,0.648199,5.185596,5.077562,5.185596,1.080332,0.864266,1.728532,0.781163,12.498615,3.645429,5.077562,7.290859,4.686981,0.781163,6.249307,6.119114,6.249307,1.301939,1.041551,2.083102,0.398892,6.382271,1.861496,2.592798,3.722992,2.393352,0.398892,3.191136,3.124654,3.191136,0.664820,0.531856,1.063712,0.024931,0.398892,0.116343,0.162050,0.232687,0.149584,0.199446,0.195291,0.199446,0.066482
3,AS Monaco,12.333333,5.385185,12.340741,1.800000,0.059259,2.851852,12.859259,2.111111,0.785185,0.214815,9.029630,2.111111,0.081481,3.014815,6.281481,4.133333,1.637037,1.925926,0.059259,0.140741,0.118519,0.970370,0.237037,0.948148,0.325926,0.414815,0.192593,4.496296,0.081481,0.066667,41.711111,5.585185,2.688889,0.362963,0.392593,10.274074,0.637037,1.051852,0.370370,5.903704,4.251852,2.177778,0.251852,2.837037,1.059259,1.140741,1.540741,1.066667,0.185185,1.274074,1.385185,0.577778,0.281481,0.303704,0.303704,4.607407,4.666667,2.837037,0.185185,50.740741,0.145858,56.824691,57.555556,34.990123,2.283951,24.811742,25.130864,15.277970,0.997257,56.858820,57.590123,35.011139,2.285322,8.293333,8.400000,5.106667,0.333333,0.000000,0.000000,0.273032,0.276543,0.168121,13.139643,13.308642,8.090809,0.528121,59.247846,60.009877,36.482195,2.381344,9.726749,9.851852,5.989300,0.390947,3.617668,3.664198,2.227599,0.145405,0.989739,1.002469,0.609438,1.160384,13.071385,4.880439,5.255857,7.098820,4.914568,0.853224,5.870178,6.382112,2.662058,1.296900,1.399287,1.399287,1.175309,13.239506,4.943210,5.323457,7.190123,4.977778,0.864198,5.945679,6.464198,2.696296,1.313580,1.417284,1.417284,0.714513,8.048779,3.005158,3.236324,4.371139,3.026173,0.525377,3.614595,3.929822,1.639177,0.798573,0.861619,0.861619,0.046639,0.525377,0.196159,0.211248,0.285322,0.197531,0.235940,0.256516,0.106996,0.056241
4,AS Nancy Lorraine,11.020833,3.708333,13.125000,1.666667,0.072917,2.416667,12.229167,2.645833,0.697917,0.125000,8.239583,2.645833,0.083333,2.489583,6.437500,2.718750,1.791667,1.510417,0.072917,0.114583,0.041667,0.885417,0.156250,0.854167,0.145833,0.291667,0.104167,5.229167,0.177083,0.166667,39.468750,5.447917,2.010417,0.437500,0.343750,9.104167,0.718750,0.895833,0.302083,6.781250,2.489583,1.750000,0.354167,2.052083,0.906250,0.791667,1.145833,0.979167,0.187500,1.385417,1.406250,0.854167,0.281250,0.322917,0.302083,3.614583,5.156250,2.052083,0.187500,47.708333,0.126984,39.835720,56.826172,22.615668,2.066406,13.404080,19.121094,7.609809,0.695312,47.441406,67.675781,26.933594,2.460938,6.024306,8.593750,3.420139,0.312500,0.000000,0.000000,0.263563,0.375977,0.149631,8.735243,12.460938,4.959201,0.453125,44.203342,63.056641,25.095269,2.292969,9.563585,13.642578,5.429470,0.496094,2.522678,3.598633,1.432183,0.130859,0.451823,0.644531,0.256510,1.280165,7.417426,3.275716,2.861545,4.141710,3.539280,0.677734,5.007704,5.083008,3.087457,1.016602,1.167209,1.091905,1.826172,10.581055,4.672852,4.082031,5.908203,5.048828,0.966797,7.143555,7.250977,4.404297,1.450195,1.665039,1.557617,0.726780,4.211046,1.859701,1.624566,2.351345,2.009332,0.384766,2.842990,2.885742,1.752821,0.577148,0.662652,0.619900,0.066406,0.384766,0.169922,0.148438,0.214844,0.183594,0.259766,0.263672,0.160156,0.056641
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137,Villarreal,10.812865,4.666667,11.959064,2.292398,0.029240,2.807018,12.192982,2.719298,0.608187,0.140351,7.929825,2.719298,0.093567,2.783626,6.725146,2.994152,1.374269,1.309942,0.064327,0.204678,0.152047,0.959064,0.198830,1.116959,0.269006,0.374269,0.140351,4.146199,0.052632,0.058480,40.304094,5.403509,1.725146,0.251462,0.549708,9.228070,0.485380,0.853801,0.245614,5.479532,3.923977,1.409357,0.356725,2.479532,0.941520,0.883041,1.426901,0.467836,0.169591,1.198830,1.292398,0.690058,0.263158,0.263158,0.292398,3.923977,4.216374,2.479532,0.169591,48.233918,0.192176,42.429431,45.591088,26.810848,1.833761,18.311891,19.676413,11.571150,0.791423,46.927089,50.423891,29.652885,2.028145,8.995315,9.665607,5.684074,0.388769,0.022947,0.024657,0.114736,0.123286,0.072501,11.014671,11.835437,6.960090,0.476044,47.844978,51.410177,30.232892,2.067816,10.670463,11.465579,6.742587,0.461168,2.386512,2.564345,1.508020,0.103143,0.550734,0.591772,0.348005,1.399781,9.729626,3.694504,3.465032,5.599125,1.835779,0.665470,4.704182,5.071338,2.707773,1.032625,1.032625,1.147362,1.504087,10.454636,3.969803,3.723231,6.016347,1.972573,0.715058,5.054718,5.449232,2.909545,1.109572,1.109572,1.232858,0.884511,6.148080,2.334530,2.189528,3.538046,1.160015,0.420505,2.972539,3.204542,1.711022,0.652508,0.652508,0.725009,0.060497,0.420505,0.159673,0.149755,0.241989,0.079341,0.203310,0.219179,0.117027,0.049588
138,Watford,11.300000,4.166667,12.083333,2.150000,0.066667,2.733333,10.650000,1.366667,0.266667,0.100000,7.750000,1.366667,0.100000,2.383333,5.466667,3.416667,1.533333,1.266667,0.016667,0.233333,0.100000,0.883333,0.150000,0.816667,0.150000,0.233333,0.150000,4.933333,0.016667,0.133333,37.150000,5.016667,1.950000,0.700000,0.083333,9.566667,0.766667,0.766667,0.200000,7.200000,2.483333,1.616667,0.150000,3.150000,0.750000,0.733333,1.250000,1.166667,0.150000,1.350000,1.083333,0.516667,0.316667,0.250000,0.383333,3.633333,4.350000,3.150000,0.150000,44.900000,0.179310,41.056667,49.155000,35.595000,1.695000,15.138889,18.125000,13.125000,0.625000,43.902778,52.562500,38.062500,1.812500,7.811667,9.352500,6.772500,0.322500,0.060556,0.072500,0.242222,0.290000,0.210000,9.931111,11.890000,8.610000,0.410000,38.695000,46.327500,33.547500,1.597500,4.965556,5.945000,4.305000,0.205000,0.968889,1.160000,0.840000,0.040000,0.363333,0.435000,0.315000,0.545000,11.445000,2.725000,2.664444,4.541667,4.238889,0.545000,4.905000,3.936111,1.877222,1.150556,0.908333,1.392778,0.652500,13.702500,3.262500,3.190000,5.437500,5.075000,0.652500,5.872500,4.712500,2.247500,1.377500,1.087500,1.667500,0.472500,9.922500,2.362500,2.310000,3.937500,3.675000,0.472500,4.252500,3.412500,1.627500,0.997500,0.787500,1.207500,0.022500,0.472500,0.112500,0.110000,0.187500,0.175000,0.202500,0.162500,0.077500,0.057500
139,Werder Bremen,12.395722,4.598930,14.144385,2.005348,0.053476,2.620321,14.513369,2.240642,0.588235,0.181818,8.791444,2.240642,0.106952,2.828877,8.235294,3.882353,1.582888,1.866310,0.128342,0.139037,0.165775,0.983957,0.267380,0.983957,0.224599,0.374332,0.144385,4.732620,0.181818,0.080214,44.550802,5.786096,2.219251,0.577540,0.208556,10.224599,0.796791,1.112299,0.262032,6.962567,3.331551,2.101604,0.320856,2.941176,0.935829,1.090909,1.342246,0.705882,0.203209,1.609626,1.224599,0.866310,0.272727,0.358289,0.427807,4.299465,4.925134,2.941176,0.203209,53.342246,0.141777,53.294976,61.050588,36.458006,2.518917,19.772942,22.650347,13.526266,0.934542,60.813292,69.662987,41.601132,2.874260,8.621922,9.876605,5.898081,0.407504,0.000000,0.000000,0.229918,0.263376,0.157282,11.265978,12.905431,7.706826,0.532472,62.399725,71.480283,42.686379,2.949241,9.633561,11.035460,6.590123,0.455318,2.529097,2.897137,1.730104,0.119534,0.781721,0.895479,0.534759,1.379508,12.645486,4.023564,4.690326,5.770940,3.034917,0.873688,6.920530,5.265121,3.724670,1.172581,1.540450,1.839343,1.580257,14.485687,4.609082,5.372873,6.610741,3.476565,1.000829,7.927622,6.031313,4.266693,1.343218,1.764620,2.107009,0.943693,8.650519,2.752438,3.208556,3.947782,2.076125,0.597672,4.734193,3.601762,2.547971,0.802139,1.053791,1.258257,0.065201,0.597672,0.190168,0.221682,0.272756,0.143441,0.327090,0.248849,0.176042,0.086934
140,West Brom,6.731132,2.820755,6.264151,1.084906,0.028302,1.679245,6.202830,1.146226,0.113208,0.023585,4.910377,1.146226,0.028302,1.372642,3.396226,2.018868,0.632075,0.801887,0.047170,0.066038,0.028302,0.481132,0.132075,0.518868,0.103774,0.358491,0.066038,2.773585,0.033019,0.056604,21.183962,2.990566,1.438679,0.301887,0.179245,5.580189,0.349057,0.683962,0.117925,3.759434,1.731132,1.240566,0.136792,1.849057,0.433962,0.443396,0.632075,0.514151,0.056604,0.797170,0.811321,0.495283,0.165094,0.165094,0.207547,1.981132,2.820755,1.849057,0.056604,26.094340,0.173193,13.335262,18.986873,12.446244,0.381007,5.588288,7.956657,5.215735,0.159665,12.410110,17.669633,11.582770,0.354575,2.149341,3.060253,2.006052,0.061410,0.000000,0.000000,0.056070,0.079833,0.052332,3.326807,4.736739,3.105020,0.095052,12.288626,17.496663,11.469384,0.351104,2.270826,3.233224,2.119438,0.064881,0.224279,0.319331,0.209327,0.006408,0.046725,0.066527,0.043610,0.271004,3.663225,0.859737,0.878426,1.252225,1.018601,0.112140,1.579299,1.607334,0.981221,0.327074,0.327074,0.411178,0.385858,5.215735,1.224101,1.250712,1.782930,1.450294,0.159665,2.248621,2.288537,1.397072,0.465691,0.465691,0.585440,0.252937,3.419010,0.802421,0.819865,1.168743,0.950694,0.104664,1.474012,1.500178,0.915806,0.305269,0.305269,0.383766,0.007743,0.104664,0.024564,0.025098,0.035778,0.029103,0.045123,0.045924,0.028035,0.011748


In [314]:
# 8. Scale features
scaler = StandardScaler()
features_to_scale = [col for col in team_aggregated.columns if col != 'event_team']
scaled_features = scaler.fit_transform(team_aggregated[features_to_scale])

# Create final scaled DataFrame
final_team_cluster_data = pd.DataFrame(scaled_features, columns=features_to_scale)
final_team_cluster_data['event_team'] = team_aggregated['event_team']

# Save processed data
output_file = 'data/final_team_cluster_data3.csv'
final_team_cluster_data.to_csv(output_file, index=False)
print(f"Processed and normalized dataset saved to {output_file}")

Processed and normalized dataset saved to data/final_team_cluster_data3.csv
