In [19]:
import pandas as pd
from matplotlib import pyplot as plt

from sklearn.model_selection import train_test_split

from lib.Utility import exportExcelWithTimeStamp

from datetime import date, timedelta
import yfinance as yf #Alternative package if webreader does not work: pip install yfinance
import numpy as np # Fundamental package for scientific computing with Python
import joblib
import plotly.express as px
from plotly import graph_objects as go
# Train the model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn import preprocessing

In [20]:
filename = 'data/League1/23-24.xlsx'
data = pd.read_excel(filename)
data.tail()

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA
547,E2,2024-04-27,12:30:00,Reading,Blackpool,3,2,H,1,1,...,2.25,0.5,1.9,1.95,1.9,1.99,1.91,2.1,1.83,1.96
548,E2,2024-04-27,12:30:00,Shrewsbury,Leyton Orient,1,3,A,0,2,...,1.74,-0.25,2.0,1.85,2.04,1.86,2.25,1.86,2.06,1.76
549,E2,2024-04-27,12:30:00,Stevenage,Cheltenham,2,1,H,1,0,...,1.77,0.0,1.85,2.0,1.86,2.03,1.89,2.09,1.85,1.97
550,E2,2024-04-27,12:30:00,Wigan,Bristol Rvs,2,0,H,1,0,...,2.03,-0.25,1.83,2.03,1.83,2.07,1.85,2.07,1.8,2.01
551,E2,2024-04-27,12:30:00,Wycombe,Charlton,1,0,H,1,0,...,2.11,-0.25,2.03,1.83,2.03,1.86,2.11,1.9,2.01,1.8


In [21]:
# Supponiamo che il DataFrame si chiami df
# Aggiungere le colonne per i goal cumulativi
data['HomeGoalsCumulative'] = 0
data['AwayGoalsCumulative'] = 0

# Creare un dizionario per tenere traccia dei goal cumulativi di ogni squadra
goals_cumulative = {}

# Iterare sulle righe del DataFrame
for index, row in data.iterrows():
    home_team = row['HomeTeam']
    away_team = row['AwayTeam']
    home_goals = row['FTHG']
    away_goals = row['FTAG']
    
    # Inizializzare il conteggio dei goal per le squadre se non già presente
    if home_team not in goals_cumulative:
        goals_cumulative[home_team] = 0
    if away_team not in goals_cumulative:
        goals_cumulative[away_team] = 0
    
    # Assegnare i goal cumulativi fino a quel momento
    data.at[index, 'HomeGoalsCumulative'] = goals_cumulative[home_team]
    data.at[index, 'AwayGoalsCumulative'] = goals_cumulative[away_team]
    
    # Aggiornare i goal cumulativi con i goal della partita attuale
    goals_cumulative[home_team] += home_goals
    goals_cumulative[away_team] += away_goals

import pandas as pd

# Supponiamo che il DataFrame si chiami df
# Aggiungere le colonne per i punti cumulativi
data['HomePointsCumulative'] = 0
data['AwayPointsCumulative'] = 0

# Creare due dizionari per tenere traccia dei punti cumulativi di ogni squadra
points_cumulative = {}

# Iterare sulle righe del DataFrame
for index, row in data.iterrows():
    home_team = row['HomeTeam']
    away_team = row['AwayTeam']
    result = row['FTR']
    
    # Inizializzare i punti per le squadre se non già presenti
    if home_team not in points_cumulative:
        points_cumulative[home_team] = 0
    if away_team not in points_cumulative:
        points_cumulative[away_team] = 0
    
    # Assegnare i punti cumulativi fino a quel momento
    data.at[index, 'HomePointsCumulative'] = points_cumulative[home_team]
    data.at[index, 'AwayPointsCumulative'] = points_cumulative[away_team]
    
    # Aggiornare i punti cumulativi in base al risultato della partita
    if result == 'H':  # Vittoria della squadra di casa
        points_cumulative[home_team] += 3
    elif result == 'A':  # Vittoria della squadra ospite
        points_cumulative[away_team] += 3
    elif result == 'D':  # Pareggio
        points_cumulative[home_team] += 1
        points_cumulative[away_team] += 1

# Supponiamo che il DataFrame si chiami df
# Aggiungere le colonne per i goal subiti cumulativi
data['HomeGoalsConcededCumulative'] = 0
data['AwayGoalsConcededCumulative'] = 0

# Creare un dizionario per tenere traccia dei goal subiti cumulativi di ogni squadra
goals_conceded_cumulative = {}

# Iterare sulle righe del DataFrame
for index, row in data.iterrows():
    home_team = row['HomeTeam']
    away_team = row['AwayTeam']
    home_goals = row['FTHG']  # Goal fatti dalla squadra di casa
    away_goals = row['FTAG']  # Goal fatti dalla squadra ospite
    
    # Inizializzare i goal subiti per le squadre se non già presenti
    if home_team not in goals_conceded_cumulative:
        goals_conceded_cumulative[home_team] = 0
    if away_team not in goals_conceded_cumulative:
        goals_conceded_cumulative[away_team] = 0
    
    # Assegnare i goal subiti cumulativi fino a quel momento
    data.at[index, 'HomeGoalsConcededCumulative'] = goals_conceded_cumulative[home_team]
    data.at[index, 'AwayGoalsConcededCumulative'] = goals_conceded_cumulative[away_team]
    
    # Aggiornare i goal subiti cumulativi con i goal della partita attuale
    goals_conceded_cumulative[home_team] += away_goals  # La squadra di casa subisce i goal della squadra ospite
    goals_conceded_cumulative[away_team] += home_goals  # La squadra ospite subisce i goal della squadra di casa

data['MatchGoal'] = data['FTHG'] + data['FTAG']

data.tail(35)

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,MaxCAHA,AvgCAHH,AvgCAHA,HomeGoalsCumulative,AwayGoalsCumulative,HomePointsCumulative,AwayPointsCumulative,HomeGoalsConcededCumulative,AwayGoalsConcededCumulative,MatchGoal
517,E2,2024-04-13,15:00:00,Shrewsbury,Wycombe,0,2,A,0,0,...,2.17,1.78,2.05,31,51,46,52,59,52,2
518,E2,2024-04-13,15:00:00,Stevenage,Burton,1,2,A,0,1,...,1.98,1.91,1.88,53,33,67,40,42,59,3
519,E2,2024-04-16,19:45:00,Bolton,Shrewsbury,2,2,D,1,2,...,2.06,1.83,1.97,79,31,82,46,46,61,4
520,E2,2024-04-16,19:45:00,Bristol Rvs,Cambridge,1,0,H,0,0,...,1.91,1.97,1.83,51,38,54,46,64,58,1
521,E2,2024-04-16,19:45:00,Burton,Cheltenham,1,2,A,1,0,...,1.93,1.96,1.84,35,35,43,38,60,60,3
522,E2,2024-04-16,19:45:00,Oxford,Lincoln,0,1,A,0,0,...,1.85,2.0,1.8,76,62,73,68,53,37,1
523,E2,2024-04-16,19:45:00,Peterboro,Fleetwood Town,4,1,H,1,1,...,1.85,2.01,1.79,80,44,77,37,55,68,5
524,E2,2024-04-16,19:45:00,Port Vale,Wycombe,1,2,A,0,1,...,1.85,2.07,1.76,40,53,40,55,70,52,3
525,E2,2024-04-16,19:45:00,Portsmouth,Barnsley,3,2,H,1,1,...,2.11,1.77,2.05,72,77,91,75,37,57,5
526,E2,2024-04-19,20:00:00,Oxford,Stevenage,1,1,D,0,1,...,1.86,2.03,1.79,76,54,73,67,54,44,2


In [22]:
resultFileName = filename[:-5] + '-E.xlsx'
data.to_excel(resultFileName)

## TODO
- Average Goal Per Match H/A
