In [9]:
import pandas as pd
import numpy as np

# FTHG = Full Time Home Team Goals
# FTAG = Full Time Away Team Goals
# FTR = Full Time Result (H=Home Win, D=Draw, A=Away Win)

fields = ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG", "FTR"]

df = pd.read_csv("data/Ligue1_game_stats.csv", usecols = fields)

# Rename the columns for clarity: 
df = df.rename(columns={'FTHG': 'Home_goals', 'FTAG': 'Away_goals', 'FTR': 'Result'})
# Home_goals = Full Time Home Team Goals
# Away_goals = Full Time Away Team Goals
# Result = Full Time Result (H=Home Win, D=Draw, A=Away Win)

# Or rename the existing DataFrame (rather than creating a copy) 
df.rename(columns={'oldName1': 'newName1', 'oldName2': 'newName2'}, inplace=True)


df.head()

Unnamed: 0,Date,HomeTeam,AwayTeam,Home_goals,Away_goals,Result
0,15/08/2025,Rennes,Marseille,1,0,H
1,16/08/2025,Lens,Lyon,0,1,A
2,16/08/2025,Monaco,Le Havre,3,1,H
3,16/08/2025,Nice,Toulouse,0,1,A
4,17/08/2025,Brest,Lille,3,3,D


In [10]:
# Dataset with only Marseille games

df_om_games = df[(df["HomeTeam"] == "Marseille") | (df["AwayTeam"] == "Marseille")].copy()


df_om_games.head()

df_om_games["OM_goals_scored"] = np.where(df_om_games['HomeTeam'] == 'Marseille', df_om_games["Home_goals"], df_om_games["Away_goals"])
df_om_games["OM_goals_conceded"] = np.where(df_om_games['HomeTeam'] == 'Marseille', df_om_games["Away_goals"], df_om_games["Home_goals"])
df_om_games["is_OM_win"] = np.where(df_om_games['HomeTeam'] == 'Marseille', df_om_games["Result"] == "H", df_om_games["Result"] == "A")
df_om_games["is_Draw"] = df_om_games["Result"] == "D"


df_om_games.head(10)

df_om_games = df_om_games.assign(goals_scored_last_3_games=lambda d: d["OM_goals_scored"].rolling(3).sum().shift())
df_om_games = df_om_games.assign(goals_conceded_last_3_games=lambda d: d["OM_goals_conceded"].rolling(3).sum().shift())

df_om_games

Unnamed: 0,Date,HomeTeam,AwayTeam,Home_goals,Away_goals,Result,OM_goals_scored,OM_goals_conceded,is_OM_win,is_Draw,goals_scored_last_3_games,goals_conceded_last_3_games
0,15/08/2025,Rennes,Marseille,1,0,H,0,1,False,False,,
10,23/08/2025,Marseille,Paris FC,5,2,H,5,2,True,False,,
26,31/08/2025,Lyon,Marseille,1,0,H,0,1,False,False,,
27,12/09/2025,Marseille,Lorient,4,0,H,4,0,True,False,5.0,4.0
44,22/09/2025,Marseille,Paris SG,1,0,H,1,0,True,False,9.0,3.0
45,26/09/2025,Strasbourg,Marseille,1,2,A,2,1,True,False,5.0,1.0
55,04/10/2025,Metz,Marseille,0,3,A,3,0,True,False,7.0,1.0
66,18/10/2025,Marseille,Le Havre,6,2,H,6,2,True,False,6.0,1.0
75,25/10/2025,Lens,Marseille,2,1,H,1,2,False,False,11.0,3.0
85,29/10/2025,Marseille,Angers,2,2,D,2,2,False,True,10.0,4.0


In [None]:
# TODO, add number of rest days before game - pour ça il va falloir aussi rajouter les matchs joués en champions league.
# add Forme récente : points pris sur les 5 derniers matchs, différence de buts, série de victoires/défaites
# A voir si on peut ajouter: les joueurs blessés. 
