In [29]:
import pandas as pd
from matplotlib import pyplot as plt

from sklearn.model_selection import train_test_split

from lib.Utility import exportExcelWithTimeStamp

from datetime import date, timedelta
import yfinance as yf #Alternative package if webreader does not work: pip install yfinance
import numpy as np # Fundamental package for scientific computing with Python
import joblib
import plotly.express as px
from plotly import graph_objects as go
# Train the model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn import preprocessing

In [30]:
filename = 'data/19-20.xlsx'
data = pd.read_excel(filename)
data.tail()

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA
375,I1,2020-08-02,17:00:00,Spal,Fiorentina,1,3,A,1,1,...,2.45,1.25,2.05,1.88,2.03,1.89,2.07,1.98,1.98,1.89
376,I1,2020-08-02,19:45:00,Bologna,Torino,1,1,D,1,0,...,3.14,-0.75,2.07,1.86,2.03,1.88,2.11,1.88,2.04,1.84
377,I1,2020-08-02,19:45:00,Genoa,Verona,3,0,H,3,0,...,2.55,-0.75,1.87,2.06,1.99,1.93,2.0,2.1,1.87,2.01
378,I1,2020-08-02,19:45:00,Lecce,Parma,3,4,A,2,2,...,3.34,-1.0,1.87,2.06,1.87,2.05,1.92,2.11,1.85,2.02
379,I1,2020-08-02,19:45:00,Sassuolo,Udinese,0,1,A,0,0,...,2.99,-0.5,1.93,2.0,1.92,2.0,1.93,2.11,1.86,2.02


In [31]:
# Supponiamo che il DataFrame si chiami df
# Aggiungere le colonne per i goal cumulativi
data['HomeGoalsCumulative'] = 0
data['AwayGoalsCumulative'] = 0

# Creare un dizionario per tenere traccia dei goal cumulativi di ogni squadra
goals_cumulative = {}

# Iterare sulle righe del DataFrame
for index, row in data.iterrows():
    home_team = row['HomeTeam']
    away_team = row['AwayTeam']
    home_goals = row['FTHG']
    away_goals = row['FTAG']
    
    # Inizializzare il conteggio dei goal per le squadre se non già presente
    if home_team not in goals_cumulative:
        goals_cumulative[home_team] = 0
    if away_team not in goals_cumulative:
        goals_cumulative[away_team] = 0
    
    # Assegnare i goal cumulativi fino a quel momento
    data.at[index, 'HomeGoalsCumulative'] = goals_cumulative[home_team]
    data.at[index, 'AwayGoalsCumulative'] = goals_cumulative[away_team]
    
    # Aggiornare i goal cumulativi con i goal della partita attuale
    goals_cumulative[home_team] += home_goals
    goals_cumulative[away_team] += away_goals

import pandas as pd

# Supponiamo che il DataFrame si chiami df
# Aggiungere le colonne per i punti cumulativi
data['HomePointsCumulative'] = 0
data['AwayPointsCumulative'] = 0

# Creare due dizionari per tenere traccia dei punti cumulativi di ogni squadra
points_cumulative = {}

# Iterare sulle righe del DataFrame
for index, row in data.iterrows():
    home_team = row['HomeTeam']
    away_team = row['AwayTeam']
    result = row['FTR']
    
    # Inizializzare i punti per le squadre se non già presenti
    if home_team not in points_cumulative:
        points_cumulative[home_team] = 0
    if away_team not in points_cumulative:
        points_cumulative[away_team] = 0
    
    # Assegnare i punti cumulativi fino a quel momento
    data.at[index, 'HomePointsCumulative'] = points_cumulative[home_team]
    data.at[index, 'AwayPointsCumulative'] = points_cumulative[away_team]
    
    # Aggiornare i punti cumulativi in base al risultato della partita
    if result == 'H':  # Vittoria della squadra di casa
        points_cumulative[home_team] += 3
    elif result == 'A':  # Vittoria della squadra ospite
        points_cumulative[away_team] += 3
    elif result == 'D':  # Pareggio
        points_cumulative[home_team] += 1
        points_cumulative[away_team] += 1

# Supponiamo che il DataFrame si chiami df
# Aggiungere le colonne per i goal subiti cumulativi
data['HomeGoalsConcededCumulative'] = 0
data['AwayGoalsConcededCumulative'] = 0

# Creare un dizionario per tenere traccia dei goal subiti cumulativi di ogni squadra
goals_conceded_cumulative = {}

# Iterare sulle righe del DataFrame
for index, row in data.iterrows():
    home_team = row['HomeTeam']
    away_team = row['AwayTeam']
    home_goals = row['FTHG']  # Goal fatti dalla squadra di casa
    away_goals = row['FTAG']  # Goal fatti dalla squadra ospite
    
    # Inizializzare i goal subiti per le squadre se non già presenti
    if home_team not in goals_conceded_cumulative:
        goals_conceded_cumulative[home_team] = 0
    if away_team not in goals_conceded_cumulative:
        goals_conceded_cumulative[away_team] = 0
    
    # Assegnare i goal subiti cumulativi fino a quel momento
    data.at[index, 'HomeGoalsConcededCumulative'] = goals_conceded_cumulative[home_team]
    data.at[index, 'AwayGoalsConcededCumulative'] = goals_conceded_cumulative[away_team]
    
    # Aggiornare i goal subiti cumulativi con i goal della partita attuale
    goals_conceded_cumulative[home_team] += away_goals  # La squadra di casa subisce i goal della squadra ospite
    goals_conceded_cumulative[away_team] += home_goals  # La squadra ospite subisce i goal della squadra di casa

data['MatchGoal'] = data['FTHG'] + data['FTAG']

data.tail(35)

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,MaxCAHA,AvgCAHH,AvgCAHA,HomeGoalsCumulative,AwayGoalsCumulative,HomePointsCumulative,AwayPointsCumulative,HomeGoalsConcededCumulative,AwayGoalsConcededCumulative,MatchGoal
345,I1,2020-07-22,20:45:00,Sampdoria,Genoa,1,2,A,1,1,...,2.06,1.88,2.0,45,42,41,33,56,64,3
346,I1,2020-07-22,20:45:00,Spal,Roma,1,6,A,1,2,...,2.02,1.92,1.95,24,63,19,58,64,46,7
347,I1,2020-07-22,20:45:00,Torino,Verona,1,1,D,0,0,...,2.19,1.79,2.1,41,42,37,45,62,42,2
348,I1,2020-07-23,18:30:00,Udinese,Juventus,2,1,H,0,1,...,2.18,1.83,2.04,32,72,36,80,48,36,3
349,I1,2020-07-23,20:45:00,Lazio,Cagliari,2,1,H,0,1,...,2.01,1.94,1.93,69,49,69,42,37,50,3
350,I1,2020-07-24,20:45:00,Milan,Atalanta,1,1,D,1,1,...,1.91,2.0,1.87,55,95,59,74,44,44,2
351,I1,2020-07-25,16:15:00,Brescia,Parma,1,2,A,0,0,...,1.89,2.02,1.86,33,49,24,43,74,51,3
352,I1,2020-07-25,18:30:00,Genoa,Inter,0,3,A,0,1,...,2.04,1.9,1.98,44,74,36,73,65,36,3
353,I1,2020-07-25,20:45:00,Napoli,Sassuolo,2,0,H,1,0,...,1.93,2.03,1.84,56,64,56,48,47,60,2
354,I1,2020-07-26,16:15:00,Bologna,Lecce,3,2,H,2,1,...,2.14,1.82,2.06,48,45,43,32,58,77,5


In [32]:
resultFileName = filename + 'data.xlsx'
data.to_excel(resultFileName)

## TODO
- Average Goal Per Match H/A
