In [17]:
import pandas as pd
from matplotlib import pyplot as plt

from sklearn.model_selection import train_test_split

from lib.Utility import exportExcelWithTimeStamp

from datetime import date, timedelta
import yfinance as yf #Alternative package if webreader does not work: pip install yfinance
import numpy as np # Fundamental package for scientific computing with Python
import joblib
import plotly.express as px
from plotly import graph_objects as go
# Train the model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn import preprocessing

In [18]:
filename = 'data/PremierLeague/23-24.xlsx'
data = pd.read_excel(filename)
data.tail()

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA
375,E0,2024-05-19,16:00:00,Crystal Palace,Aston Villa,5,0,H,2,0,...,2.78,-0.75,1.73,2.08,1.78,2.16,1.93,2.21,1.81,2.05
376,E0,2024-05-19,16:00:00,Liverpool,Wolves,2,0,H,2,0,...,5.17,-2.75,2.07,1.86,2.04,1.85,2.1,1.89,2.04,1.82
377,E0,2024-05-19,16:00:00,Luton,Fulham,2,4,A,1,2,...,2.69,0.25,2.0,1.93,1.99,1.93,2.02,1.94,1.96,1.91
378,E0,2024-05-19,16:00:00,Man City,West Ham,3,1,H,2,1,...,4.82,-3.0,2.03,1.9,1.99,1.9,2.05,1.99,1.96,1.91
379,E0,2024-05-19,16:00:00,Sheffield United,Tottenham,0,3,A,0,1,...,3.81,1.5,2.06,1.87,2.05,1.88,2.08,1.89,2.01,1.86


In [19]:
# Supponiamo che il DataFrame si chiami df
# Aggiungere le colonne per i goal cumulativi
data['HomeGoalsCumulative'] = 0
data['AwayGoalsCumulative'] = 0

# Creare un dizionario per tenere traccia dei goal cumulativi di ogni squadra
goals_cumulative = {}

# Iterare sulle righe del DataFrame
for index, row in data.iterrows():
    home_team = row['HomeTeam']
    away_team = row['AwayTeam']
    home_goals = row['FTHG']
    away_goals = row['FTAG']
    
    # Inizializzare il conteggio dei goal per le squadre se non già presente
    if home_team not in goals_cumulative:
        goals_cumulative[home_team] = 0
    if away_team not in goals_cumulative:
        goals_cumulative[away_team] = 0
    
    # Assegnare i goal cumulativi fino a quel momento
    data.at[index, 'HomeGoalsCumulative'] = goals_cumulative[home_team]
    data.at[index, 'AwayGoalsCumulative'] = goals_cumulative[away_team]
    
    # Aggiornare i goal cumulativi con i goal della partita attuale
    goals_cumulative[home_team] += home_goals
    goals_cumulative[away_team] += away_goals

import pandas as pd

# Supponiamo che il DataFrame si chiami df
# Aggiungere le colonne per i punti cumulativi
data['HomePointsCumulative'] = 0
data['AwayPointsCumulative'] = 0

# Creare due dizionari per tenere traccia dei punti cumulativi di ogni squadra
points_cumulative = {}

# Iterare sulle righe del DataFrame
for index, row in data.iterrows():
    home_team = row['HomeTeam']
    away_team = row['AwayTeam']
    result = row['FTR']
    
    # Inizializzare i punti per le squadre se non già presenti
    if home_team not in points_cumulative:
        points_cumulative[home_team] = 0
    if away_team not in points_cumulative:
        points_cumulative[away_team] = 0
    
    # Assegnare i punti cumulativi fino a quel momento
    data.at[index, 'HomePointsCumulative'] = points_cumulative[home_team]
    data.at[index, 'AwayPointsCumulative'] = points_cumulative[away_team]
    
    # Aggiornare i punti cumulativi in base al risultato della partita
    if result == 'H':  # Vittoria della squadra di casa
        points_cumulative[home_team] += 3
    elif result == 'A':  # Vittoria della squadra ospite
        points_cumulative[away_team] += 3
    elif result == 'D':  # Pareggio
        points_cumulative[home_team] += 1
        points_cumulative[away_team] += 1

# Supponiamo che il DataFrame si chiami df
# Aggiungere le colonne per i goal subiti cumulativi
data['HomeGoalsConcededCumulative'] = 0
data['AwayGoalsConcededCumulative'] = 0

# Creare un dizionario per tenere traccia dei goal subiti cumulativi di ogni squadra
goals_conceded_cumulative = {}

# Iterare sulle righe del DataFrame
for index, row in data.iterrows():
    home_team = row['HomeTeam']
    away_team = row['AwayTeam']
    home_goals = row['FTHG']  # Goal fatti dalla squadra di casa
    away_goals = row['FTAG']  # Goal fatti dalla squadra ospite
    
    # Inizializzare i goal subiti per le squadre se non già presenti
    if home_team not in goals_conceded_cumulative:
        goals_conceded_cumulative[home_team] = 0
    if away_team not in goals_conceded_cumulative:
        goals_conceded_cumulative[away_team] = 0
    
    # Assegnare i goal subiti cumulativi fino a quel momento
    data.at[index, 'HomeGoalsConcededCumulative'] = goals_conceded_cumulative[home_team]
    data.at[index, 'AwayGoalsConcededCumulative'] = goals_conceded_cumulative[away_team]
    
    # Aggiornare i goal subiti cumulativi con i goal della partita attuale
    goals_conceded_cumulative[home_team] += away_goals  # La squadra di casa subisce i goal della squadra ospite
    goals_conceded_cumulative[away_team] += home_goals  # La squadra ospite subisce i goal della squadra di casa

data['MatchGoal'] = data['FTHG'] + data['FTAG']

data.tail(35)

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,MaxCAHA,AvgCAHH,AvgCAHA,HomeGoalsCumulative,AwayGoalsCumulative,HomePointsCumulative,AwayPointsCumulative,HomeGoalsConcededCumulative,AwayGoalsConcededCumulative,MatchGoal
345,E0,2024-04-28,16:30:00,Nott'm Forest,Man City,0,2,A,0,1,...,1.93,2.01,1.87,42,80,30,76,60,32,2
346,E0,2024-05-02,19:30:00,Chelsea,Tottenham,2,0,H,1,0,...,2.06,1.87,1.99,63,67,48,60,59,52,2
347,E0,2024-05-03,20:00:00,Luton,Everton,1,1,D,1,1,...,1.87,2.04,1.83,48,37,25,44,77,48,2
348,E0,2024-05-04,12:30:00,Arsenal,Bournemouth,3,0,H,1,0,...,2.16,1.8,2.07,85,52,80,48,28,60,3
349,E0,2024-05-04,15:00:00,Brentford,Fulham,0,0,D,0,0,...,2.12,1.81,2.06,52,51,35,43,60,55,0
350,E0,2024-05-04,15:00:00,Burnley,Newcastle,1,4,A,0,3,...,2.05,1.87,2.01,38,74,24,53,70,55,5
351,E0,2024-05-04,15:00:00,Sheffield United,Nott'm Forest,1,3,A,1,1,...,1.85,2.04,1.83,34,42,16,30,97,62,4
352,E0,2024-05-04,17:30:00,Man City,Wolves,5,1,H,3,0,...,2.07,1.86,2.02,82,48,79,46,32,55,6
353,E0,2024-05-05,14:00:00,Brighton,Aston Villa,1,0,H,0,0,...,2.13,1.82,2.02,52,73,44,67,57,52,1
354,E0,2024-05-05,14:00:00,Chelsea,West Ham,5,0,H,3,0,...,1.93,1.99,1.87,65,56,51,49,59,65,5


In [20]:
resultFileName = filename[:-5] + '-E.xlsx'
data.to_excel(resultFileName)

## TODO
- Average Goal Per Match H/A
